base_scicat_dataset.yaml

---
name: SciCatDatasetSchema
id: https://example.org/base_scicat
imports: [linkml:types]
prefixes:
  MySchema: https://example.org/MySchema
  linkml: https://w3id.org/linkml/
default_prefix: linkml
default_range: string
enums:
  type_options:
    permissible_values:
      raw:
      derived:
classes:
  ScicatDataset:
    abstract: true
    attributes:
      owner:
        description: Owner or custodian of the dataset, usually first name + last
          name. The string may contain a list of persons, which should then be seperated
          by semicolons.
        required: true
      ownerEmail:
        description: Email of the owner or custodian of the dataset. The string may
          contain a list of emails, which should then be seperated by semicolons.
      orcidOfOwner:
        description: ORCID of the owner or custodian. The string may contain a list
          of ORCIDs, which should then be separated by semicolons.
      contactEmail:
        description: Email of the contact person for this dataset. The string may
          contain a list of emails, which should then be seperated by semicolons.
      sourceFolder:
        description: Absolute file path on file server containing the files of this
          dataset, e.g. /some/path/to/sourcefolder. In case of a single file dataset,
          e.g. HDF5 data, it contains the path up to, but excluding the filename.
          Trailing slashes are removed.
      #~ sourceFolderHost:
        #~ description: DNS host name of file server hosting sourceFolder, optionally including
          #~ a protocol e.g. [protocol://]fileserver1.example.com
      size: 
        description: Total size of all source files contained in source folder on
          disk when unpacked.
        range: float
      dataQualityMetrics:
        range: float
        description: Data Quality Metrics is a number given by the user to rate the
          dataset.
      #~ packedSize:
        #~ description: Total size of all datablock package files created for this dataset.
        #~ range: float
      #~ numberOfFiles:
        #~ description: Total number of files in all OrigDatablocks for this dataset.
        #~ range: float
      #~ numberOfFilesArchived:
        #~ description: Total number of files in all Datablocks for this dataset.
        #~ range: float
        #~ required: true
      creationTime:
        description: Time when dataset became fully available on disk, i.e. all containing
          files have been written. Format according to chapter 5.6 internet date/time
          format in RFC 3339. Local times without timezone/offset info are automatically
          transformed to UTC using the timezone of the API server.
      type:
        description: Characterize type of dataset, either 'raw' or 'derived'. Autofilled
          when choosing the proper inherited models.
        range: type_options
        required: true
      #~ validationStatus:
        #~ description: Defines a level of trust, e.g. a measure of how much data was verified
          #~ or used by other persons.
      keywords:
        multivalued: true
      description:
        description: Free text explanation of contents of dataset.
      datasetName:
        description: A name for the dataset, given by the creator to carry some semantic
          meaning. Useful for display purposes e.g. instead of displaying the pid.
          Will be autofilled if missing using info from sourceFolder.
      #~ classification:
        #~ description: ACIA information about AUthenticity,COnfidentiality,INtegrity and
          #~ AVailability requirements of dataset. E.g. AV(ailabilty)=medium could trigger
          #~ the creation of a two tape copies. Format 'AV=medium,CO=low'
      license:
        description: Name of the license under which the data can be used.
      #~ version:
        #~ description: Version of the API used in creation of the dataset.
      #~ isPublished:
        #~ description: Flag is true when data are made publicly available.
        #~ range: boolean
      techniques:
        multivalued: true
        range: TechniqueClass
      #~ sharedWith:
        #~ multivalued: true
      #~ relationships:
        #~ multivalued: true
        #~ range: RelationshipClass
      scientificMetadata:
        description: JSON object containing the scientific metadata.
        abstract: true
      principalInvestigator:
        description: First name and last name of principal investigator(s). If multiple
          PIs are present, use a semicolon separated list. This field is required
          if the dataset is a Raw dataset.
        required: true
      endTime:
        description: End time of data acquisition for this dataset, format according
          to chapter 5.6 internet date/time format in RFC 3339. Local times without
          timezone/offset info are automatically transformed to UTC using the timezone
          of the API server.
      creationLocation:
        description: Unique location identifier where data was taken, usually in the
          form /Site-name/facility-name/instrumentOrBeamline-name. This field is required
          if the dataset is a Raw dataset.
      dataFormat:
        description: Defines the format of the data files in this dataset, e.g Nexus
          Version x.y.
      proposalId:
        description: The ID of the proposal to which the dataset belongs.
      #~ sampleId:
        #~ description: ID of the sample used when collecting the data.
      instrumentId:
        description: ID of the instrument where the data was created.
      #~ ownerGroup: {}
      #~ accessGroups: {}
      #~ instrumentGroup: {}