base_scicat_dataset.yaml

name: MySchema
id: https://example.org/MySchema
imports:
- linkml:types
prefixes:
  MySchema: https://example.org/MySchema
  linkml: https://w3id.org/linkml/
default_prefix: MySchema
default_range: string
enums:
  type_options:
    permissible_values:
      raw:
      derived:
slots:
  owner:
    description: Owner or custodian of the dataset, usually first name + last name.
      The string may contain a list of persons, which should then be seperated by
      semicolons.
    required: true
  ownerEmail:
    description: Email of the owner or custodian of the dataset. The string may contain
      a list of emails, which should then be seperated by semicolons.
  orcidOfOwner:
    description: ORCID of the owner or custodian. The string may contain a list of
      ORCIDs, which should then be separated by semicolons.
  contactEmail:
    description: Email of the contact person for this dataset. The string may contain
      a list of emails, which should then be seperated by semicolons.
  sourceFolder:
    description: Absolute file path on file server containing the files of this dataset,
      e.g. /some/path/to/sourcefolder. In case of a single file dataset, e.g. HDF5
      data, it contains the path up to, but excluding the filename. Trailing slashes
      are removed.
  #~ sourceFolderHost:
    #~ description: DNS host name of file server hosting sourceFolder, optionally including
      #~ a protocol e.g. [protocol://]fileserver1.example.com
  size:
    description: Total size of all source files contained in source folder on disk
      when unpacked.
    range: float
  dataQualityMetrics:
    range: float
    description: Data Quality Metrics is a number given by the user to rate the dataset.
  #~ packedSize:
    #~ description: Total size of all datablock package files created for this dataset.
    #~ range: float
  #~ numberOfFiles:
    #~ description: Total number of files in all OrigDatablocks for this dataset.
    #~ range: float
  #~ numberOfFilesArchived:
    #~ description: Total number of files in all Datablocks for this dataset.
    #~ range: float
    #~ required: true
  creationTime:
    description: Time when dataset became fully available on disk, i.e. all containing
      files have been written. Format according to chapter 5.6 internet date/time
      format in RFC 3339. Local times without timezone/offset info are automatically
      transformed to UTC using the timezone of the API server.
  type:
    description: Characterize type of dataset, either 'raw' or 'derived'. Autofilled
      when choosing the proper inherited models.
    range: type_options
    required: true
  #~ validationStatus:
    #~ description: Defines a level of trust, e.g. a measure of how much data was verified
      #~ or used by other persons.
  keywords:
    multivalued: true
  description:
    description: Free text explanation of contents of dataset.
  datasetName:
    description: A name for the dataset, given by the creator to carry some semantic
      meaning. Useful for display purposes e.g. instead of displaying the pid. Will
      be autofilled if missing using info from sourceFolder.
  #~ classification:
    #~ description: ACIA information about AUthenticity,COnfidentiality,INtegrity and
      #~ AVailability requirements of dataset. E.g. AV(ailabilty)=medium could trigger
      #~ the creation of a two tape copies. Format 'AV=medium,CO=low'
  license:
    description: Name of the license under which the data can be used.
  #~ version:
    #~ description: Version of the API used in creation of the dataset.
  #~ isPublished:
    #~ description: Flag is true when data are made publicly available.
    #~ range: boolean
  techniques:
    multivalued: true
    range: TechniqueClass
  #~ sharedWith:
    #~ multivalued: true
  #~ relationships:
    #~ multivalued: true
    #~ range: RelationshipClass
  scientificMetadata:
    description: JSON object containing the scientific metadata.
    abstract: true
  principalInvestigator:
    description: First name and last name of principal investigator(s). If multiple
      PIs are present, use a semicolon separated list. This field is required if the
      dataset is a Raw dataset.
    required: true
  endTime:
    description: End time of data acquisition for this dataset, format according to
      chapter 5.6 internet date/time format in RFC 3339. Local times without timezone/offset
      info are automatically transformed to UTC using the timezone of the API server.
  creationLocation:
    description: Unique location identifier where data was taken, usually in the form
      /Site-name/facility-name/instrumentOrBeamline-name. This field is required if
      the dataset is a Raw dataset.
  dataFormat:
    description: Defines the format of the data files in this dataset, e.g Nexus Version
      x.y.
  proposalId:
    description: The ID of the proposal to which the dataset belongs.
  #~ sampleId:
    #~ description: ID of the sample used when collecting the data.
  instrumentId:
    description: ID of the instrument where the data was created.
  #~ ownerGroup: {}
  #~ accessGroups: {}
  #~ instrumentGroup: {}
classes:
  ScicatDataset:
    abstract: true
    slots:
    - owner
    - ownerEmail
    - orcidOfOwner
    - contactEmail
    - sourceFolder
    #~ - sourceFolderHost
    - size
    #~ - packedSize
    #~ - numberOfFiles
    #~ - numberOfFilesArchived
    - creationTime
    - type
    #~ - validationStatus
    - keywords
    - description
    - datasetName
    #~ - classification
    - license
    #~ - version
    #~ - isPublished
    - techniques
    #~ - sharedWith
    #~ - relationships
    - scientificMetadata
    - principalInvestigator
    - endTime
    - creationLocation
    - dataFormat
    - proposalId
    #~ - sampleId       # not in use here since it referst to cici
    - instrumentId
    - dataQualityMetrics
    #~ - ownerGroup
    #~ - accessGroups
    #~ - instrumentGroup
    #~ slot_usage:
      #~ owner:
        #~ description: Owner or custodian of the dataset, usually first name + last
          #~ name. The string may contain a list of persons, which should then be seperated
          #~ by semicolons.
        #~ required: true
      #~ ownerEmail:
        #~ description: Email of the owner or custodian of the dataset. The string may
          #~ contain a list of emails, which should then be seperated by semicolons.
      #~ orcidOfOwner:
        #~ description: ORCID of the owner or custodian. The string may contain a list
          #~ of ORCIDs, which should then be separated by semicolons.
      #~ contactEmail:
        #~ description: Email of the contact person for this dataset. The string may
          #~ contain a list of emails, which should then be seperated by semicolons.
        #~ required: true
      #~ sourceFolder:
        #~ description: Absolute file path on file server containing the files of this
          #~ dataset, e.g. /some/path/to/sourcefolder. In case of a single file dataset,
          #~ e.g. HDF5 data, it contains the path up to, but excluding the filename.
          #~ Trailing slashes are removed.
        #~ required: true
      #~ sourceFolderHost:
        #~ description: DNS host name of file server hosting sourceFolder, optionally
          #~ including a protocol e.g. [protocol://]fileserver1.example.com
      #~ size:
        #~ description: Total size of all source files contained in source folder on
          #~ disk when unpacked.
        #~ range: float
      #~ packedSize:
        #~ description: Total size of all datablock package files created for this dataset.
        #~ range: float
      #~ numberOfFiles:
        #~ description: Total number of files in all OrigDatablocks for this dataset.
        #~ range: float
      #~ numberOfFilesArchived:
        #~ description: Total number of files in all Datablocks for this dataset.
        #~ range: float
        #~ required: true
      #~ creationTime:
        #~ description: Time when dataset became fully available on disk, i.e. all containing
          #~ files have been written. Format according to chapter 5.6 internet date/time
          #~ format in RFC 3339. Local times without timezone/offset info are automatically
          #~ transformed to UTC using the timezone of the API server.
        #~ required: true
      #~ type:
        #~ description: Characterize type of dataset, either 'raw' or 'derived'. Autofilled
          #~ when choosing the proper inherited models.
        #~ range: type_options
        #~ required: true
      #~ validationStatus:
        #~ description: Defines a level of trust, e.g. a measure of how much data was
          #~ verified or used by other persons.
      #~ keywords:
        #~ multivalued: true
      #~ description:
        #~ description: Free text explanation of contents of dataset.
      #~ datasetName:
        #~ description: A name for the dataset, given by the creator to carry some semantic
          #~ meaning. Useful for display purposes e.g. instead of displaying the pid.
          #~ Will be autofilled if missing using info from sourceFolder.
      #~ classification:
        #~ description: ACIA information about AUthenticity,COnfidentiality,INtegrity
          #~ and AVailability requirements of dataset. E.g. AV(ailabilty)=medium could
          #~ trigger the creation of a two tape copies. Format 'AV=medium,CO=low'
      #~ license:
        #~ description: Name of the license under which the data can be used.
      #~ version:
        #~ description: Version of the API used in creation of the dataset.
      #~ isPublished:
        #~ description: Flag is true when data are made publicly available.
        #~ range: boolean
      #~ techniques:
        #~ multivalued: true
        #~ range: TechniqueClass
      #~ sharedWith:
        #~ multivalued: true
      #~ relationships:
        #~ multivalued: true
        #~ range: RelationshipClass
      #~ scientificMetadata:
        #~ description: JSON object containing the scientific metadata.
      #~ principalInvestigator:
        #~ description: First name and last name of principal investigator(s). If multiple
          #~ PIs are present, use a semicolon separated list. This field is required
          #~ if the dataset is a Raw dataset.
        #~ required: true
      #~ endTime:
        #~ description: End time of data acquisition for this dataset, format according
          #~ to chapter 5.6 internet date/time format in RFC 3339. Local times without
          #~ timezone/offset info are automatically transformed to UTC using the timezone
          #~ of the API server.
      #~ creationLocation:
        #~ description: Unique location identifier where data was taken, usually in the
          #~ form /Site-name/facility-name/instrumentOrBeamline-name. This field is required
          #~ if the dataset is a Raw dataset.
        #~ required: true
      #~ dataFormat:
        #~ description: Defines the format of the data files in this dataset, e.g Nexus
          #~ Version x.y.
      #~ proposalId:
        #~ description: The ID of the proposal to which the dataset belongs.
      #~ sampleId:
        #~ description: ID of the sample used when collecting the data.
      #~ instrumentId:
        #~ description: ID of the instrument where the data was created.
      #~ ownerGroup:
        #~ required: true
      #~ accessGroups:
        #~ multivalued: true
      #~ instrumentGroup: {}