diff --git a/project/jsonld/data_sheets_schema.jsonld b/project/jsonld/data_sheets_schema.jsonld index 5b765ea8..0c051caf 100644 --- a/project/jsonld/data_sheets_schema.jsonld +++ b/project/jsonld/data_sheets_schema.jsonld @@ -19,7 +19,8 @@ "D4D_Ethics", "D4D_Human", "D4D_Data_Governance", - "D4D_Variables" + "D4D_Variables", + "D4D_FileCollection" ], "license": "MIT", "prefixes": [ @@ -31,6 +32,10 @@ "prefix_prefix": "biolink", "prefix_reference": "https://w3id.org/biolink/vocab/" }, + { + "prefix_prefix": "d4d", + "prefix_reference": "https://w3id.org/bridge2ai/data-sheets-schema/" + }, { "prefix_prefix": "data_sheets_schema", "prefix_reference": "https://w3id.org/bridge2ai/data-sheets-schema/" @@ -1412,6 +1417,117 @@ ] } ] + }, + { + "name": "FileTypeEnum", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection/FileTypeEnum", + "description": "Types of individual files within datasets.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection", + "permissible_values": [ + { + "text": "data_file", + "description": "A data file containing dataset content", + "meaning": "schema:DataDownload" + }, + { + "text": "code_file", + "description": "A source code or script file", + "meaning": "schema:SoftwareSourceCode" + }, + { + "text": "documentation_file", + "description": "A documentation file (README, guide, etc.)", + "meaning": "schema:Documentation" + }, + { + "text": "metadata_file", + "description": "A metadata or annotation file", + "meaning": "dcat:CatalogRecord" + }, + { + "text": "configuration_file", + "description": "A configuration or settings file", + "meaning": "d4d:ConfigurationFile" + }, + { + "text": "notebook_file", + "description": "A computational notebook file (Jupyter, R Markdown, etc.)", + "meaning": "d4d:NotebookFile" + }, + { + "text": "image_file", + "description": "An image or visualization file", + "meaning": "schema:ImageObject" + }, + { + "text": "archive_file", + "description": "An archive or compressed file", + "meaning": "d4d:ArchiveFile" + }, + { + "text": "other", + "description": "Other file type", + "meaning": "d4d:OtherFile" + } + ] + }, + { + "name": "FileCollectionTypeEnum", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection/FileCollectionTypeEnum", + "description": "Types of file collections within datasets.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection", + "permissible_values": [ + { + "text": "raw_data", + "description": "Raw, unprocessed data files", + "meaning": "d4d:RawData" + }, + { + "text": "processed_data", + "description": "Cleaned, processed, or transformed data files", + "meaning": "d4d:ProcessedData" + }, + { + "text": "training_split", + "description": "Files designated for model training", + "meaning": "d4d:TrainingSplit" + }, + { + "text": "test_split", + "description": "Files designated for model testing", + "meaning": "d4d:TestSplit" + }, + { + "text": "validation_split", + "description": "Files designated for model validation", + "meaning": "d4d:ValidationSplit" + }, + { + "text": "documentation", + "description": "Documentation files (README, codebook, etc.)", + "meaning": "schema:Documentation" + }, + { + "text": "metadata", + "description": "Metadata or annotation files", + "meaning": "dcat:CatalogRecord" + }, + { + "text": "code", + "description": "Code or script files", + "meaning": "schema:SoftwareSourceCode" + }, + { + "text": "supplementary", + "description": "Supplementary materials", + "meaning": "schema:SupplementalMaterial" + }, + { + "text": "other", + "description": "Other file collection type", + "meaning": "d4d:OtherFileCollection" + } + ] } ], "slots": [ @@ -1529,11 +1645,15 @@ { "name": "dialect", "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/dialect", + "description": "Specific format dialect or variation (e.g., CSV dialect, JSON-LD profile).", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/dialect", - "owner": "Dataset", + "mappings": [ + "http://schema.org/encodingFormat" + ], + "slot_uri": "http://schema.org/encodingFormat", + "owner": "File", "domain_of": [ - "Dataset" + "File" ], "range": "string", "@type": "SlotDefinition" @@ -1547,9 +1667,9 @@ "http://www.w3.org/ns/dcat#byteSize" ], "slot_uri": "http://www.w3.org/ns/dcat#byteSize", - "owner": "Dataset", + "owner": "File", "domain_of": [ - "Dataset" + "File" ], "range": "integer", "@type": "SlotDefinition" @@ -1562,9 +1682,10 @@ "http://schema.org/contentUrl" ], "slot_uri": "http://schema.org/contentUrl", - "owner": "Dataset", + "owner": "FileCollection", "domain_of": [ - "Dataset" + "File", + "FileCollection" ], "range": "string", "@type": "SlotDefinition" @@ -1597,9 +1718,9 @@ "http://purl.org/dc/terms/format" ], "slot_uri": "http://purl.org/dc/terms/format", - "owner": "Dataset", + "owner": "File", "domain_of": [ - "Dataset" + "File" ], "range": "FormatEnum", "@type": "SlotDefinition" @@ -1613,9 +1734,9 @@ "http://www.w3.org/ns/dcat#mediaType" ], "slot_uri": "http://www.w3.org/ns/dcat#mediaType", - "owner": "Dataset", + "owner": "File", "domain_of": [ - "Dataset" + "File" ], "range": "EncodingEnum", "@type": "SlotDefinition" @@ -1629,9 +1750,11 @@ "http://www.w3.org/ns/dcat#compressFormat" ], "slot_uri": "http://www.w3.org/ns/dcat#compressFormat", - "owner": "Information", + "owner": "FileCollection", "domain_of": [ - "Information" + "Information", + "File", + "FileCollection" ], "range": "CompressionEnum", "@type": "SlotDefinition" @@ -1648,9 +1771,9 @@ "http://schema.org/encodingFormat" ], "slot_uri": "http://www.w3.org/ns/dcat#mediaType", - "owner": "Dataset", + "owner": "File", "domain_of": [ - "Dataset" + "File" ], "range": "MediaTypeEnum", "@type": "SlotDefinition" @@ -1664,9 +1787,9 @@ "http://purl.org/dc/terms/identifier" ], "slot_uri": "http://purl.org/dc/terms/identifier", - "owner": "Dataset", + "owner": "File", "domain_of": [ - "Dataset" + "File" ], "range": "string", "@type": "SlotDefinition" @@ -1680,9 +1803,9 @@ "http://purl.org/dc/terms/identifier" ], "slot_uri": "http://purl.org/dc/terms/identifier", - "owner": "Dataset", + "owner": "File", "domain_of": [ - "Dataset" + "File" ], "range": "string", "@type": "SlotDefinition" @@ -1696,9 +1819,9 @@ "http://purl.org/dc/terms/identifier" ], "slot_uri": "http://purl.org/dc/terms/identifier", - "owner": "Dataset", + "owner": "File", "domain_of": [ - "Dataset" + "File" ], "range": "string", "@type": "SlotDefinition" @@ -1913,10 +2036,11 @@ "http://purl.org/dc/terms/references" ], "slot_uri": "http://purl.org/dc/terms/references", - "owner": "ExternalResource", + "owner": "FileCollection", "domain_of": [ "Dataset", - "ExternalResource" + "ExternalResource", + "FileCollection" ], "range": "string", "multivalued": true, @@ -1925,21 +2049,82 @@ { "name": "resources", "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/resources", - "description": "Sub-resources or component datasets. Used in DatasetCollection to contain Dataset objects, and in Dataset to allow nested resource structures.", + "description": "Sub-resources or component items. In DatasetCollection, contains Dataset objects. In Dataset, contains nested Dataset objects. In FileCollection, contains nested FileCollection objects. The specific range is defined via slot_usage in each class.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/resources", - "owner": "Dataset", + "mappings": [ + "http://schema.org/hasPart" + ], + "slot_uri": "http://schema.org/hasPart", + "owner": "FileCollection", "domain_of": [ "DatasetCollection", - "Dataset" + "Dataset", + "FileCollection" ], "range": "Dataset", "multivalued": true, "@type": "SlotDefinition" }, + { + "name": "dataset__file_collections", + "description": "Collections of files within this dataset. Each collection represents a logical grouping of files with shared characteristics (e.g., all training data, all image files, all raw data files). Maps to nested RO-Crate Dataset entities via schema:hasPart.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "http://schema.org/hasPart" + ], + "exact_mappings": [ + "http://www.w3.org/ns/dcat#distribution" + ], + "slot_uri": "http://schema.org/hasPart", + "alias": "file_collections", + "owner": "Dataset", + "domain_of": [ + "Dataset" + ], + "range": "FileCollection", + "multivalued": true, + "inlined": true, + "inlined_as_list": true, + "@type": "SlotDefinition" + }, + { + "name": "dataset__total_file_count", + "description": "Total number of files across all file collections in this dataset. Can be aggregated from file_collections[].file_count.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/totalFileCount" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/totalFileCount", + "alias": "total_file_count", + "owner": "Dataset", + "domain_of": [ + "Dataset" + ], + "range": "integer", + "@type": "SlotDefinition" + }, + { + "name": "dataset__total_size_bytes", + "description": "Total size of all files in bytes across all file collections. Can be aggregated from file_collections[].total_bytes.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "http://www.w3.org/ns/dcat#byteSize" + ], + "slot_uri": "http://www.w3.org/ns/dcat#byteSize", + "alias": "total_size_bytes", + "owner": "Dataset", + "domain_of": [ + "Dataset" + ], + "range": "integer", + "@type": "SlotDefinition" + }, { "name": "dataset__purposes", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/purposes" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/purposes", "alias": "purposes", "owner": "Dataset", @@ -1955,6 +2140,9 @@ { "name": "dataset__tasks", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/tasks" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/tasks", "alias": "tasks", "owner": "Dataset", @@ -1970,7 +2158,10 @@ { "name": "dataset__addressing_gaps", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/addressing_gaps", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/addressingGaps" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/addressingGaps", "alias": "addressing_gaps", "owner": "Dataset", "domain_of": [ @@ -1985,7 +2176,10 @@ { "name": "dataset__creators", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/creators", + "mappings": [ + "http://schema.org/creator" + ], + "slot_uri": "http://schema.org/creator", "alias": "creators", "owner": "Dataset", "domain_of": [ @@ -2000,7 +2194,10 @@ { "name": "dataset__funders", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/funders", + "mappings": [ + "http://schema.org/funder" + ], + "slot_uri": "http://schema.org/funder", "alias": "funders", "owner": "Dataset", "domain_of": [ @@ -2036,6 +2233,9 @@ { "name": "dataset__instances", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/instances" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/instances", "alias": "instances", "owner": "Dataset", @@ -2051,6 +2251,9 @@ { "name": "dataset__anomalies", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/anomalies" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/anomalies", "alias": "anomalies", "owner": "Dataset", @@ -2067,6 +2270,9 @@ "name": "dataset__known_biases", "description": "Known biases present in the dataset that may affect fairness, representativeness, or model performance. Uses BiasTypeEnum for standardized bias categorization mapped to the AI Ontology (AIO).", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/known_biases" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/known_biases", "alias": "known_biases", "owner": "Dataset", @@ -2083,6 +2289,9 @@ "name": "dataset__known_limitations", "description": "Known limitations of the dataset that may affect its use or interpretation. Distinct from biases (systematic errors) and anomalies (data quality issues).", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/known_limitations" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/known_limitations", "alias": "known_limitations", "owner": "Dataset", @@ -2098,7 +2307,10 @@ { "name": "dataset__confidential_elements", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/confidential_elements", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/confidentialElements" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/confidentialElements", "alias": "confidential_elements", "owner": "Dataset", "domain_of": [ @@ -2113,7 +2325,10 @@ { "name": "dataset__content_warnings", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/content_warnings", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/contentWarnings" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/contentWarnings", "alias": "content_warnings", "owner": "Dataset", "domain_of": [ @@ -2128,6 +2343,9 @@ { "name": "dataset__subpopulations", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/subpopulations" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/subpopulations", "alias": "subpopulations", "owner": "Dataset", @@ -2143,7 +2361,10 @@ { "name": "dataset__sensitive_elements", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/sensitive_elements", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/sensitiveElements" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/sensitiveElements", "alias": "sensitive_elements", "owner": "Dataset", "domain_of": [ @@ -2158,7 +2379,10 @@ { "name": "dataset__acquisition_methods", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/acquisition_methods", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/acquisitionMethods" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/acquisitionMethods", "alias": "acquisition_methods", "owner": "Dataset", "domain_of": [ @@ -2173,7 +2397,10 @@ { "name": "dataset__collection_mechanisms", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection_mechanisms", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/collectionMechanisms" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collectionMechanisms", "alias": "collection_mechanisms", "owner": "Dataset", "domain_of": [ @@ -2188,7 +2415,10 @@ { "name": "dataset__sampling_strategies", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/sampling_strategies", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/samplingStrategies" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/samplingStrategies", "alias": "sampling_strategies", "owner": "Dataset", "domain_of": [ @@ -2203,7 +2433,10 @@ { "name": "dataset__data_collectors", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/data_collectors", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/dataCollectors" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/dataCollectors", "alias": "data_collectors", "owner": "Dataset", "domain_of": [ @@ -2218,7 +2451,10 @@ { "name": "dataset__collection_timeframes", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection_timeframes", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/collectionTimeframes" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collectionTimeframes", "alias": "collection_timeframes", "owner": "Dataset", "domain_of": [ @@ -2234,7 +2470,10 @@ "name": "dataset__missing_data_documentation", "description": "Documentation of missing data patterns and handling strategies.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/missing_data_documentation", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/missingDataDocumentation" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/missingDataDocumentation", "alias": "missing_data_documentation", "owner": "Dataset", "domain_of": [ @@ -2250,7 +2489,10 @@ "name": "dataset__raw_data_sources", "description": "Description of raw data sources before preprocessing.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/raw_data_sources", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/rawDataSources" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/rawDataSources", "alias": "raw_data_sources", "owner": "Dataset", "domain_of": [ @@ -2265,7 +2507,10 @@ { "name": "dataset__ethical_reviews", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/ethical_reviews", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/ethicalReviews" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/ethicalReviews", "alias": "ethical_reviews", "owner": "Dataset", "domain_of": [ @@ -2280,7 +2525,10 @@ { "name": "dataset__data_protection_impacts", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/data_protection_impacts", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/dataProtectionImpacts" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/dataProtectionImpacts", "alias": "data_protection_impacts", "owner": "Dataset", "domain_of": [ @@ -2296,7 +2544,10 @@ "name": "dataset__human_subject_research", "description": "Information about whether dataset involves human subjects research, including IRB approval, ethics review, and regulatory compliance.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human_subject_research", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/humanSubjectResearch" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/humanSubjectResearch", "alias": "human_subject_research", "owner": "Dataset", "domain_of": [ @@ -2310,7 +2561,10 @@ "name": "dataset__informed_consent", "description": "Details about informed consent procedures, including consent type, documentation, and withdrawal mechanisms.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/informed_consent", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/informedConsent" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/informedConsent", "alias": "informed_consent", "owner": "Dataset", "domain_of": [ @@ -2323,23 +2577,67 @@ "@type": "SlotDefinition" }, { - "name": "dataset__vulnerable_populations", + "name": "dataset__at_risk_populations", "description": "Information about protections for at-risk populations (e.g., minors, pregnant women, prisoners) including special safeguards and assent procedures.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/vulnerable_populations", - "alias": "vulnerable_populations", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/atRiskPopulations" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/atRiskPopulations", + "alias": "at_risk_populations", + "owner": "Dataset", + "domain_of": [ + "Dataset" + ], + "range": "AtRiskPopulations", + "inlined": true, + "@type": "SlotDefinition" + }, + { + "name": "dataset__participant_privacy", + "description": "Information about privacy protections and anonymization procedures for human research participants.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/participantPrivacy" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/participantPrivacy", + "alias": "participant_privacy", + "owner": "Dataset", + "domain_of": [ + "Dataset" + ], + "range": "ParticipantPrivacy", + "multivalued": true, + "inlined": true, + "inlined_as_list": true, + "@type": "SlotDefinition" + }, + { + "name": "dataset__participant_compensation", + "description": "Information about compensation or incentives provided to human research participants.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/participantCompensation" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/participantCompensation", + "alias": "participant_compensation", "owner": "Dataset", "domain_of": [ "Dataset" ], - "range": "VulnerablePopulations", + "range": "HumanSubjectCompensation", + "multivalued": true, "inlined": true, + "inlined_as_list": true, "@type": "SlotDefinition" }, { "name": "dataset__preprocessing_strategies", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing_strategies", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/preprocessingStrategies" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessingStrategies", "alias": "preprocessing_strategies", "owner": "Dataset", "domain_of": [ @@ -2354,7 +2652,10 @@ { "name": "dataset__cleaning_strategies", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/cleaning_strategies", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/cleaningStrategies" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/cleaningStrategies", "alias": "cleaning_strategies", "owner": "Dataset", "domain_of": [ @@ -2369,7 +2670,10 @@ { "name": "dataset__labeling_strategies", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/labeling_strategies", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/labelingStrategies" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/labelingStrategies", "alias": "labeling_strategies", "owner": "Dataset", "domain_of": [ @@ -2384,7 +2688,10 @@ { "name": "dataset__raw_sources", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/raw_sources", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/rawSources" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/rawSources", "alias": "raw_sources", "owner": "Dataset", "domain_of": [ @@ -2400,6 +2707,9 @@ "name": "dataset__imputation_protocols", "description": "Data imputation methodology and techniques.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/imputation_protocols" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/imputation_protocols", "alias": "imputation_protocols", "owner": "Dataset", @@ -2416,6 +2726,9 @@ "name": "dataset__annotation_analyses", "description": "Analysis of annotation quality and inter-annotator agreement.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/annotation_analyses" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/annotation_analyses", "alias": "annotation_analyses", "owner": "Dataset", @@ -2447,7 +2760,10 @@ { "name": "dataset__existing_uses", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/existing_uses", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/existingUses" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/existingUses", "alias": "existing_uses", "owner": "Dataset", "domain_of": [ @@ -2462,7 +2778,10 @@ { "name": "dataset__use_repository", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/use_repository", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/useRepository" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/useRepository", "alias": "use_repository", "owner": "Dataset", "domain_of": [ @@ -2477,7 +2796,10 @@ { "name": "dataset__other_tasks", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/other_tasks", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/otherTasks" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/otherTasks", "alias": "other_tasks", "owner": "Dataset", "domain_of": [ @@ -2492,7 +2814,10 @@ { "name": "dataset__future_use_impacts", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/future_use_impacts", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/futureUseImpacts" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/futureUseImpacts", "alias": "future_use_impacts", "owner": "Dataset", "domain_of": [ @@ -2507,7 +2832,10 @@ { "name": "dataset__discouraged_uses", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/discouraged_uses", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/discouragedUses" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/discouragedUses", "alias": "discouraged_uses", "owner": "Dataset", "domain_of": [ @@ -2523,7 +2851,10 @@ "name": "dataset__intended_uses", "description": "Explicit intended and recommended uses for this dataset. Complements future_use_impacts by focusing on positive applications.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/intended_uses", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/intendedUses" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/intendedUses", "alias": "intended_uses", "owner": "Dataset", "domain_of": [ @@ -2539,7 +2870,10 @@ "name": "dataset__prohibited_uses", "description": "Explicitly prohibited or forbidden uses for this dataset. Stronger than discouraged_uses - these are not permitted.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/prohibited_uses", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/prohibitedUses" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/prohibitedUses", "alias": "prohibited_uses", "owner": "Dataset", "domain_of": [ @@ -2554,7 +2888,10 @@ { "name": "dataset__distribution_formats", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/distribution_formats", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/distributionFormats" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/distributionFormats", "alias": "distribution_formats", "owner": "Dataset", "domain_of": [ @@ -2569,7 +2906,10 @@ { "name": "dataset__distribution_dates", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/distribution_dates", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/distributionDates" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/distributionDates", "alias": "distribution_dates", "owner": "Dataset", "domain_of": [ @@ -2584,7 +2924,10 @@ { "name": "dataset__license_and_use_terms", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/license_and_use_terms", + "mappings": [ + "http://schema.org/license" + ], + "slot_uri": "http://schema.org/license", "alias": "license_and_use_terms", "owner": "Dataset", "domain_of": [ @@ -2597,7 +2940,10 @@ { "name": "dataset__ip_restrictions", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/ip_restrictions", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/ipRestrictions" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/ipRestrictions", "alias": "ip_restrictions", "owner": "Dataset", "domain_of": [ @@ -2610,7 +2956,10 @@ { "name": "dataset__regulatory_restrictions", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/regulatory_restrictions", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/regulatoryRestrictions" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/regulatoryRestrictions", "alias": "regulatory_restrictions", "owner": "Dataset", "domain_of": [ @@ -2623,6 +2972,9 @@ { "name": "dataset__maintainers", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/maintainers" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/maintainers", "alias": "maintainers", "owner": "Dataset", @@ -2638,6 +2990,9 @@ { "name": "dataset__errata", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/errata" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/errata", "alias": "errata", "owner": "Dataset", @@ -2653,6 +3008,9 @@ { "name": "dataset__updates", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/updates" + ], "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/updates", "alias": "updates", "owner": "Dataset", @@ -2666,7 +3024,10 @@ { "name": "dataset__retention_limit", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/retention_limit", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/retentionLimit" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/retentionLimit", "alias": "retention_limit", "owner": "Dataset", "domain_of": [ @@ -2679,7 +3040,10 @@ { "name": "dataset__version_access", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/version_access", + "mappings": [ + "http://www.w3.org/ns/dcat#accessURL" + ], + "slot_uri": "http://www.w3.org/ns/dcat#accessURL", "alias": "version_access", "owner": "Dataset", "domain_of": [ @@ -2692,7 +3056,10 @@ { "name": "dataset__extension_mechanism", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/extension_mechanism", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/extensionMechanism" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/extensionMechanism", "alias": "extension_mechanism", "owner": "Dataset", "domain_of": [ @@ -2727,7 +3094,10 @@ { "name": "dataset__is_deidentified", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/is_deidentified", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/isDeidentified" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/isDeidentified", "alias": "is_deidentified", "owner": "Dataset", "domain_of": [ @@ -2740,9 +3110,12 @@ { "name": "dataset__is_tabular", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/is_tabular", - "alias": "is_tabular", - "owner": "Dataset", + "mappings": [ + "http://schema.org/encodingFormat" + ], + "slot_uri": "http://schema.org/encodingFormat", + "alias": "is_tabular", + "owner": "Dataset", "domain_of": [ "Dataset" ], @@ -2794,7 +3167,10 @@ "name": "dataset__related_datasets", "description": "Related datasets with typed relationships (e.g., supplements, derives from, is version of). Use DatasetRelationship class to specify relationship types.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/related_datasets", + "mappings": [ + "http://schema.org/isRelatedTo" + ], + "slot_uri": "http://schema.org/isRelatedTo", "alias": "related_datasets", "owner": "Dataset", "domain_of": [ @@ -2934,7 +3310,10 @@ "name": "datasetProperty__used_software", "description": "What software was used as part of this dataset property?", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/used_software", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/usedSoftware" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/usedSoftware", "alias": "used_software", "owner": "DatasetProperty", "domain_of": [ @@ -3194,7 +3573,10 @@ "name": "creator__credit_roles", "description": "Contributor roles using the CRediT (Contributor Roles Taxonomy) for the principal investigator or creator team. Specifies the specific contributions made to this dataset (e.g., Conceptualization, Data Curation, Methodology). Note: roles are specified here rather than on Person directly, since the same person may have different roles across different datasets.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/motivation", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/motivation#credit_roles", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/creditRoles" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/creditRoles", "alias": "credit_roles", "owner": "Creator", "domain_of": [ @@ -3329,7 +3711,10 @@ "name": "instance__label", "description": "Is there a label or target associated with each instance?\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#label", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/hasLabel" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/hasLabel", "alias": "label", "owner": "Instance", "domain_of": [ @@ -3358,7 +3743,10 @@ "name": "instance__sampling_strategies", "description": "References to one or more SamplingStrategy objects.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#sampling_strategies", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/samplingStrategies" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/samplingStrategies", "alias": "sampling_strategies", "owner": "Instance", "domain_of": [ @@ -3373,7 +3761,10 @@ "name": "instance__missing_information", "description": "References to one or more MissingInfo objects describing missing data.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#missing_information", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/missingInformation" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/missingInformation", "alias": "missing_information", "owner": "Instance", "domain_of": [ @@ -3388,7 +3779,10 @@ "name": "samplingStrategy__is_sample", "description": "Indicates whether it is a sample of a larger set.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#is_sample", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/isSample" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/isSample", "alias": "is_sample", "owner": "SamplingStrategy", "domain_of": [ @@ -3402,7 +3796,10 @@ "name": "samplingStrategy__is_random", "description": "Indicates whether the sample is random.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#is_random", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/isRandom" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/isRandom", "alias": "is_random", "owner": "SamplingStrategy", "domain_of": [ @@ -3416,7 +3813,10 @@ "name": "samplingStrategy__source_data", "description": "Description of the larger set from which the sample was drawn, if any.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#source_data", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/sourceData" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/sourceData", "alias": "source_data", "owner": "SamplingStrategy", "domain_of": [ @@ -3430,7 +3830,10 @@ "name": "samplingStrategy__is_representative", "description": "Indicates whether the sample is representative of the larger set.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#is_representative", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/isRepresentative" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/isRepresentative", "alias": "is_representative", "owner": "SamplingStrategy", "domain_of": [ @@ -3444,7 +3847,10 @@ "name": "samplingStrategy__representative_verification", "description": "Explanation of how representativeness was validated or verified.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#representative_verification", + "mappings": [ + "http://schema.org/description" + ], + "slot_uri": "http://schema.org/description", "alias": "representative_verification", "owner": "SamplingStrategy", "domain_of": [ @@ -3458,7 +3864,10 @@ "name": "samplingStrategy__why_not_representative", "description": "Explanation of why the sample is not representative, if applicable.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#why_not_representative", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/whyNotRepresentative" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/whyNotRepresentative", "alias": "why_not_representative", "owner": "SamplingStrategy", "domain_of": [ @@ -3472,7 +3881,10 @@ "name": "samplingStrategy__strategies", "description": "Description of the sampling strategy (deterministic, probabilistic, etc.).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#strategies", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/strategies" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/strategies", "alias": "strategies", "owner": "SamplingStrategy", "domain_of": [ @@ -3571,7 +3983,10 @@ "name": "datasetBias__bias_type", "description": "The type of bias identified, using standardized categories from the Artificial Intelligence Ontology (AIO).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#bias_type", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/biasType" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/biasType", "alias": "bias_type", "owner": "DatasetBias", "domain_of": [ @@ -3600,7 +4015,10 @@ "name": "datasetBias__mitigation_strategy", "description": "Steps taken or recommended to mitigate this bias.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#mitigation_strategy", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/mitigation_strategy" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/mitigation_strategy", "alias": "mitigation_strategy", "owner": "DatasetBias", "domain_of": [ @@ -3613,7 +4031,10 @@ "name": "datasetBias__affected_subsets", "description": "Specific subsets or features of the dataset affected by this bias.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#affected_subsets", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/affectedSubsets" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/affectedSubsets", "alias": "affected_subsets", "owner": "DatasetBias", "domain_of": [ @@ -3627,7 +4048,10 @@ "name": "datasetLimitation__limitation_type", "description": "Category of limitation (e.g., scope, coverage, temporal, methodological).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#limitation_type", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/limitationType" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/limitationType", "alias": "limitation_type", "owner": "DatasetLimitation", "domain_of": [ @@ -3656,7 +4080,10 @@ "name": "datasetLimitation__scope_impact", "description": "How this limitation affects the scope or applicability of the dataset.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#scope_impact", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/scopeImpact" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/scopeImpact", "alias": "scope_impact", "owner": "DatasetLimitation", "domain_of": [ @@ -3669,7 +4096,10 @@ "name": "datasetLimitation__recommended_mitigation", "description": "Recommended approaches for users to address this limitation.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#recommended_mitigation", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/recommendedMitigation" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/recommendedMitigation", "alias": "recommended_mitigation", "owner": "DatasetLimitation", "domain_of": [ @@ -3699,7 +4129,10 @@ "name": "externalResource__archival", "description": "Indication whether official archival versions of external resources are included.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#archival", + "mappings": [ + "http://schema.org/archivedAt" + ], + "slot_uri": "http://schema.org/archivedAt", "alias": "archival", "owner": "ExternalResource", "domain_of": [ @@ -3730,7 +4163,10 @@ "name": "confidentiality__confidential_elements_present", "description": "Indicates whether any confidential data elements are present.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#confidential_elements_present", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/confidential_elements_present" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/confidential_elements_present", "alias": "confidential_elements_present", "owner": "Confidentiality", "domain_of": [ @@ -3760,7 +4196,10 @@ "name": "contentWarning__content_warnings_present", "description": "Indicates whether any content warnings are needed.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#content_warnings_present", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/content_warnings_present" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/content_warnings_present", "alias": "content_warnings_present", "owner": "ContentWarning", "domain_of": [ @@ -3789,7 +4228,10 @@ "name": "subpopulation__subpopulation_elements_present", "description": "Indicates whether any subpopulations are explicitly identified.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#subpopulation_elements_present", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/subpopulationElementsPresent" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/subpopulationElementsPresent", "alias": "subpopulation_elements_present", "owner": "Subpopulation", "domain_of": [ @@ -3834,7 +4276,10 @@ "name": "deidentification__identifiable_elements_present", "description": "Indicates whether data subjects can be identified.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#identifiable_elements_present", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/identifiableElementsPresent" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/identifiableElementsPresent", "alias": "identifiable_elements_present", "owner": "Deidentification", "domain_of": [ @@ -3860,7 +4305,10 @@ "name": "deidentification__identifiers_removed", "description": "List of identifier types removed during de-identification.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#identifiers_removed", + "mappings": [ + "http://schema.org/identifier" + ], + "slot_uri": "http://schema.org/identifier", "alias": "identifiers_removed", "owner": "Deidentification", "domain_of": [ @@ -3891,7 +4339,10 @@ "name": "sensitiveElement__sensitive_elements_present", "description": "Indicates whether sensitive data elements are present.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#sensitive_elements_present", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/sensitive_elements_present" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/sensitive_elements_present", "alias": "sensitive_elements_present", "owner": "SensitiveElement", "domain_of": [ @@ -3921,7 +4372,10 @@ "name": "datasetRelationship__target_dataset", "description": "The dataset that this relationship points to. Can be specified by identifier, URL, or Dataset object.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#target_dataset", + "mappings": [ + "http://schema.org/identifier" + ], + "slot_uri": "http://schema.org/identifier", "alias": "target_dataset", "owner": "DatasetRelationship", "domain_of": [ @@ -3935,7 +4389,10 @@ "name": "datasetRelationship__relationship_type", "description": "The type of relationship (e.g., derives_from, supplements, is_version_of). Uses DatasetRelationshipTypeEnum for standardized relationship types.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/composition", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/composition#relationship_type", + "mappings": [ + "http://schema.org/additionalType" + ], + "slot_uri": "http://schema.org/additionalType", "alias": "relationship_type", "owner": "DatasetRelationship", "domain_of": [ @@ -3962,7 +4419,10 @@ "name": "instanceAcquisition__was_directly_observed", "description": "Whether the data was directly observed", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/was_directly_observed", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/wasDirectlyObserved" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/wasDirectlyObserved", "alias": "was_directly_observed", "owner": "InstanceAcquisition", "domain_of": [ @@ -3975,7 +4435,10 @@ "name": "instanceAcquisition__was_reported_by_subjects", "description": "Whether the data was reported directly by the subjects themselves", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/was_reported_by_subjects", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/wasReportedBySubjects" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/wasReportedBySubjects", "alias": "was_reported_by_subjects", "owner": "InstanceAcquisition", "domain_of": [ @@ -3988,7 +4451,10 @@ "name": "instanceAcquisition__was_inferred_derived", "description": "Whether the data was inferred or derived from other data", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/was_inferred_derived", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/wasInferred" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/wasInferred", "alias": "was_inferred_derived", "owner": "InstanceAcquisition", "domain_of": [ @@ -4001,7 +4467,10 @@ "name": "instanceAcquisition__was_validated_verified", "description": "Whether the data was validated or verified in any way", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/was_validated_verified", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/wasValidated" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/wasValidated", "alias": "was_validated_verified", "owner": "InstanceAcquisition", "domain_of": [ @@ -4048,7 +4517,10 @@ "name": "dataCollector__role", "description": "Role of the data collector (e.g., researcher, crowdworker)", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/role", + "mappings": [ + "http://schema.org/roleName" + ], + "slot_uri": "http://schema.org/roleName", "alias": "role", "owner": "DataCollector", "domain_of": [ @@ -4078,7 +4550,10 @@ "name": "collectionTimeframe__start_date", "description": "Start date of data collection", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/start_date", + "mappings": [ + "http://schema.org/startDate" + ], + "slot_uri": "http://schema.org/startDate", "alias": "start_date", "owner": "CollectionTimeframe", "domain_of": [ @@ -4091,7 +4566,10 @@ "name": "collectionTimeframe__end_date", "description": "End date of data collection", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/end_date", + "mappings": [ + "http://schema.org/endDate" + ], + "slot_uri": "http://schema.org/endDate", "alias": "end_date", "owner": "CollectionTimeframe", "domain_of": [ @@ -4121,7 +4599,10 @@ "name": "directCollection__is_direct", "description": "Whether collection was direct from individuals", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/is_direct", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/isDirect" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/isDirect", "alias": "is_direct", "owner": "DirectCollection", "domain_of": [ @@ -4151,7 +4632,10 @@ "name": "missingDataDocumentation__missing_data_patterns", "description": "Description of patterns in missing data (e.g., missing completely at random, missing at random, missing not at random).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/missing_data_patterns", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/missingDataPatterns" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/missingDataPatterns", "alias": "missing_data_patterns", "owner": "MissingDataDocumentation", "domain_of": [ @@ -4165,7 +4649,10 @@ "name": "missingDataDocumentation__missing_data_causes", "description": "Known or suspected causes of missing data (e.g., sensor failures, participant dropout, privacy constraints).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/missing_data_causes", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/missingDataCauses" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/missingDataCauses", "alias": "missing_data_causes", "owner": "MissingDataDocumentation", "domain_of": [ @@ -4179,7 +4666,10 @@ "name": "missingDataDocumentation__handling_strategy", "description": "Strategy used to handle missing data (e.g., deletion, imputation, flagging, multiple imputation).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/handling_strategy", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/handlingStrategy" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/handlingStrategy", "alias": "handling_strategy", "owner": "MissingDataDocumentation", "domain_of": [ @@ -4192,7 +4682,10 @@ "name": "rawDataSource__source_description", "description": "Detailed description of where raw data comes from (e.g., sensors, databases, web APIs, manual collection).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/source_description", + "mappings": [ + "http://purl.org/dc/terms/description" + ], + "slot_uri": "http://purl.org/dc/terms/description", "alias": "source_description", "owner": "RawDataSource", "domain_of": [ @@ -4206,7 +4699,10 @@ "name": "rawDataSource__source_type", "description": "Type of raw source (sensor, database, user input, web scraping, etc.).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/source_type", + "mappings": [ + "http://purl.org/dc/terms/type" + ], + "slot_uri": "http://purl.org/dc/terms/type", "alias": "source_type", "owner": "RawDataSource", "domain_of": [ @@ -4220,7 +4716,10 @@ "name": "rawDataSource__access_details", "description": "Information on how to access or retrieve the raw source data.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/access_details", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/accessDetails" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/accessDetails", "alias": "access_details", "owner": "RawDataSource", "domain_of": [ @@ -4233,7 +4732,10 @@ "name": "rawDataSource__raw_data_format", "description": "Format of the raw data before any preprocessing.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/collection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collection/raw_data_format", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/rawDataFormat" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/rawDataFormat", "alias": "raw_data_format", "owner": "RawDataSource", "domain_of": [ @@ -4300,10 +4802,13 @@ "name": "labelingStrategy__data_annotation_protocol", "description": "Annotation methodology, tasks, and protocols followed during labeling. Includes annotation guidelines, quality control procedures, and task definitions.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/dataAnnotationProtocol" + ], "exact_mappings": [ "http://mlcommons.org/croissant/RAI/dataAnnotationProtocol" ], - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#data_annotation_protocol", + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/dataAnnotationProtocol", "alias": "data_annotation_protocol", "owner": "LabelingStrategy", "domain_of": [ @@ -4317,10 +4822,13 @@ "name": "labelingStrategy__annotations_per_item", "description": "Number of annotations collected per data item. Multiple annotations per item enable calculation of inter-annotator agreement.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/annotationsPerItem" + ], "exact_mappings": [ "http://mlcommons.org/croissant/RAI/annotationsPerItem" ], - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#annotations_per_item", + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/annotationsPerItem", "alias": "annotations_per_item", "owner": "LabelingStrategy", "domain_of": [ @@ -4349,10 +4857,13 @@ "name": "labelingStrategy__annotator_demographics", "description": "Demographic information about annotators, if available and relevant (e.g., geographic location, language background, expertise level).", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/annotatorDemographics" + ], "exact_mappings": [ "http://mlcommons.org/croissant/RAI/annotatorDemographics" ], - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#annotator_demographics", + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/annotatorDemographics", "alias": "annotator_demographics", "owner": "LabelingStrategy", "domain_of": [ @@ -4383,7 +4894,10 @@ "name": "rawData__access_url", "description": "URL or access point for the raw data.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#access_url", + "mappings": [ + "http://www.w3.org/ns/dcat#accessURL" + ], + "slot_uri": "http://www.w3.org/ns/dcat#accessURL", "alias": "access_url", "owner": "RawData", "domain_of": [ @@ -4413,7 +4927,10 @@ "name": "imputationProtocol__imputation_method", "description": "Specific imputation technique used (mean, median, mode, forward fill, backward fill, interpolation, model-based imputation, etc.).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#imputation_method", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/imputation_method" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/imputation_method", "alias": "imputation_method", "owner": "ImputationProtocol", "domain_of": [ @@ -4427,7 +4944,10 @@ "name": "imputationProtocol__imputed_fields", "description": "Fields or columns where imputation was applied.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#imputed_fields", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/imputed_fields" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/imputed_fields", "alias": "imputed_fields", "owner": "ImputationProtocol", "domain_of": [ @@ -4441,7 +4961,10 @@ "name": "imputationProtocol__imputation_rationale", "description": "Justification for the imputation approach chosen, including assumptions made about missing data mechanisms.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#imputation_rationale", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/imputation_rationale" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/imputation_rationale", "alias": "imputation_rationale", "owner": "ImputationProtocol", "domain_of": [ @@ -4454,7 +4977,10 @@ "name": "imputationProtocol__imputation_validation", "description": "Methods used to validate imputation quality (if any).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#imputation_validation", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/imputation_validation" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/imputation_validation", "alias": "imputation_validation", "owner": "ImputationProtocol", "domain_of": [ @@ -4468,7 +4994,10 @@ "name": "annotationAnalysis__inter_annotator_agreement_score", "description": "Measured agreement between annotators (e.g., Cohen's kappa value, Fleiss' kappa, Krippendorff's alpha).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#inter_annotator_agreement_score", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/interAnnotatorAgreementScore" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/interAnnotatorAgreementScore", "alias": "inter_annotator_agreement_score", "owner": "AnnotationAnalysis", "domain_of": [ @@ -4481,7 +5010,10 @@ "name": "annotationAnalysis__agreement_metric", "description": "Type of agreement metric used (Cohen's kappa, Fleiss' kappa, Krippendorff's alpha, percentage agreement, etc.).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#agreement_metric", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/agreementMetric" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/agreementMetric", "alias": "agreement_metric", "owner": "AnnotationAnalysis", "domain_of": [ @@ -4494,7 +5026,10 @@ "name": "annotationAnalysis__analysis_method", "description": "Methodology used to assess annotation quality and resolve disagreements.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#analysis_method", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/analysisMethod" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/analysisMethod", "alias": "analysis_method", "owner": "AnnotationAnalysis", "domain_of": [ @@ -4507,7 +5042,10 @@ "name": "annotationAnalysis__disagreement_patterns", "description": "Systematic patterns in annotator disagreements (e.g., by demographic group, annotation difficulty, task type).\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#disagreement_patterns", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/disagreementPatterns" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/disagreementPatterns", "alias": "disagreement_patterns", "owner": "AnnotationAnalysis", "domain_of": [ @@ -4521,7 +5059,10 @@ "name": "annotationAnalysis__annotation_quality_details", "description": "Additional details on annotation quality assessment and findings.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#annotation_quality_details", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/annotationQualityDetails" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/annotationQualityDetails", "alias": "annotation_quality_details", "owner": "AnnotationAnalysis", "domain_of": [ @@ -4535,7 +5076,10 @@ "name": "machineAnnotationTools__tools", "description": "List of automated annotation tools with their versions. Format each entry as \"ToolName version\" (e.g., \"spaCy 3.5.0\", \"NLTK 3.8\", \"GPT-4 turbo\"). Use \"unknown\" for version if not available (e.g., \"Custom NER Model unknown\").\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#tools", + "mappings": [ + "http://schema.org/name" + ], + "slot_uri": "http://schema.org/name", "alias": "tools", "owner": "MachineAnnotationTools", "domain_of": [ @@ -4549,7 +5093,10 @@ "name": "machineAnnotationTools__tool_descriptions", "description": "Descriptions of what each tool does in the annotation process and what types of annotations it produces. Should correspond to the tools list.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#tool_descriptions", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/toolDescriptions" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/toolDescriptions", "alias": "tool_descriptions", "owner": "MachineAnnotationTools", "domain_of": [ @@ -4563,7 +5110,10 @@ "name": "machineAnnotationTools__tool_accuracy", "description": "Known accuracy or performance metrics for the automated tools (if available). Include metric name and value (e.g., \"spaCy F1: 0.95\", \"GPT-4 Accuracy: 92%\").\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#tool_accuracy", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/toolAccuracy" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/toolAccuracy", "alias": "tool_accuracy", "owner": "MachineAnnotationTools", "domain_of": [ @@ -4577,7 +5127,10 @@ "name": "existingUse__examples", "description": "List of examples of known/previous uses of the dataset.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/uses", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/uses#examples", + "mappings": [ + "http://schema.org/example" + ], + "slot_uri": "http://schema.org/example", "alias": "examples", "owner": "ExistingUse", "domain_of": [ @@ -4699,7 +5252,10 @@ "name": "intendedUse__use_category", "description": "Category of intended use (e.g., research, clinical, educational, commercial, policy).", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/uses", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/uses#use_category", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/useCategory" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/useCategory", "alias": "use_category", "owner": "IntendedUse", "domain_of": [ @@ -4713,7 +5269,10 @@ "name": "prohibitedUse__prohibition_reason", "description": "Reason why this use is prohibited (e.g., license restriction, ethical concern, privacy risk, legal constraint).", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/uses", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/uses#prohibition_reason", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/prohibitionReason" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/prohibitionReason", "alias": "prohibition_reason", "owner": "ProhibitedUse", "domain_of": [ @@ -4810,7 +5369,10 @@ "name": "erratum__erratum_url", "description": "URL or access point for the erratum.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance#erratum_url", + "mappings": [ + "http://www.w3.org/ns/dcat#accessURL" + ], + "slot_uri": "http://www.w3.org/ns/dcat#accessURL", "alias": "erratum_url", "owner": "Erratum", "domain_of": [ @@ -4840,7 +5402,10 @@ "name": "updatePlan__frequency", "description": "How often updates are planned (e.g., quarterly, annually).", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance#frequency", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/frequency" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/frequency", "alias": "frequency", "owner": "UpdatePlan", "domain_of": [ @@ -4870,7 +5435,10 @@ "name": "retentionLimits__retention_period", "description": "Time period for data retention.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance#retention_period", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/retentionPeriod" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/retentionPeriod", "alias": "retention_period", "owner": "RetentionLimits", "domain_of": [ @@ -4900,7 +5468,10 @@ "name": "versionAccess__latest_version_doi", "description": "DOI or URL of the latest dataset version.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance#latest_version_doi", + "mappings": [ + "http://schema.org/identifier" + ], + "slot_uri": "http://schema.org/identifier", "alias": "latest_version_doi", "owner": "VersionAccess", "domain_of": [ @@ -4913,7 +5484,10 @@ "name": "versionAccess__versions_available", "description": "List of available versions with metadata.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance#versions_available", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/versionsAvailable" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/versionsAvailable", "alias": "versions_available", "owner": "VersionAccess", "domain_of": [ @@ -4944,7 +5518,10 @@ "name": "extensionMechanism__contribution_url", "description": "URL for contribution guidelines or process.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/maintenance#contribution_url", + "mappings": [ + "http://www.w3.org/ns/dcat#landingPage" + ], + "slot_uri": "http://www.w3.org/ns/dcat#landingPage", "alias": "contribution_url", "owner": "ExtensionMechanism", "domain_of": [ @@ -5097,7 +5674,10 @@ "name": "humanSubjectResearch__involves_human_subjects", "description": "Does this dataset involve human subjects research?", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#involves_human_subjects", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/involvesHumanSubjects" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/involvesHumanSubjects", "alias": "involves_human_subjects", "owner": "HumanSubjectResearch", "domain_of": [ @@ -5110,7 +5690,10 @@ "name": "humanSubjectResearch__irb_approval", "description": "Was Institutional Review Board (IRB) approval obtained? Include approval number and institution if applicable.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#irb_approval", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/irbApproval" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/irbApproval", "alias": "irb_approval", "owner": "HumanSubjectResearch", "domain_of": [ @@ -5124,7 +5707,10 @@ "name": "humanSubjectResearch__ethics_review_board", "description": "What ethics review board(s) reviewed this research? Include institution names and approval details.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#ethics_review_board", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/ethicsReviewBoard" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/ethicsReviewBoard", "alias": "ethics_review_board", "owner": "HumanSubjectResearch", "domain_of": [ @@ -5138,7 +5724,10 @@ "name": "humanSubjectResearch__special_populations", "description": "Does the research involve any special populations that require additional protections (e.g., minors, pregnant women, prisoners)?\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#special_populations", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/specialPopulations" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/specialPopulations", "alias": "special_populations", "owner": "HumanSubjectResearch", "domain_of": [ @@ -5152,7 +5741,10 @@ "name": "humanSubjectResearch__regulatory_compliance", "description": "What regulatory frameworks govern this human subjects research (e.g., 45 CFR 46, HIPAA)?\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#regulatory_compliance", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/regulatoryCompliance" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/regulatoryCompliance", "alias": "regulatory_compliance", "owner": "HumanSubjectResearch", "domain_of": [ @@ -5166,7 +5758,10 @@ "name": "informedConsent__consent_obtained", "description": "Was informed consent obtained from all participants?", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#consent_obtained", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/consentObtained" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/consentObtained", "alias": "consent_obtained", "owner": "InformedConsent", "domain_of": [ @@ -5179,7 +5774,10 @@ "name": "informedConsent__consent_type", "description": "What type of consent was obtained (e.g., written, verbal, electronic, implied through participation)?\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#consent_type", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/consentType" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/consentType", "alias": "consent_type", "owner": "InformedConsent", "domain_of": [ @@ -5193,7 +5791,10 @@ "name": "informedConsent__consent_documentation", "description": "How is consent documented? Include references to consent forms or procedures used.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#consent_documentation", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/consentDocumentation" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/consentDocumentation", "alias": "consent_documentation", "owner": "InformedConsent", "domain_of": [ @@ -5207,7 +5808,10 @@ "name": "informedConsent__withdrawal_mechanism", "description": "How can participants withdraw their consent? What procedures are in place for data deletion upon withdrawal?\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#withdrawal_mechanism", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/withdrawalMechanism" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/withdrawalMechanism", "alias": "withdrawal_mechanism", "owner": "InformedConsent", "domain_of": [ @@ -5221,7 +5825,10 @@ "name": "informedConsent__consent_scope", "description": "What specific uses did participants consent to? Are there limitations on data use based on consent?\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#consent_scope", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/consentScope" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/consentScope", "alias": "consent_scope", "owner": "InformedConsent", "domain_of": [ @@ -5232,55 +5839,202 @@ "@type": "SlotDefinition" }, { - "name": "vulnerablePopulations__vulnerable_groups_included", + "name": "participantPrivacy__anonymization_method", + "description": "What methods were used to anonymize or de-identify participant data? Include technical details of privacy-preserving techniques.\n", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/anonymizationMethod" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/anonymizationMethod", + "alias": "anonymization_method", + "owner": "ParticipantPrivacy", + "domain_of": [ + "ParticipantPrivacy" + ], + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "participantPrivacy__reidentification_risk", + "description": "What is the assessed risk of re-identification? What measures were taken to minimize this risk?\n", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/reidentificationRisk" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/reidentificationRisk", + "alias": "reidentification_risk", + "owner": "ParticipantPrivacy", + "domain_of": [ + "ParticipantPrivacy" + ], + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "participantPrivacy__privacy_techniques", + "description": "What privacy-preserving techniques were applied (e.g., differential privacy, k-anonymity, data masking)?\n", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/privacyTechniques" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/privacyTechniques", + "alias": "privacy_techniques", + "owner": "ParticipantPrivacy", + "domain_of": [ + "ParticipantPrivacy" + ], + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "participantPrivacy__data_linkage", + "description": "Can this dataset be linked to other datasets in ways that might compromise participant privacy?\n", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/dataLinkage" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/dataLinkage", + "alias": "data_linkage", + "owner": "ParticipantPrivacy", + "domain_of": [ + "ParticipantPrivacy" + ], + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "humanSubjectCompensation__compensation_provided", + "description": "Were participants compensated for their participation?", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/compensationProvided" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/compensationProvided", + "alias": "compensation_provided", + "owner": "HumanSubjectCompensation", + "domain_of": [ + "HumanSubjectCompensation" + ], + "range": "boolean", + "@type": "SlotDefinition" + }, + { + "name": "humanSubjectCompensation__compensation_type", + "description": "What type of compensation was provided (e.g., monetary payment, gift cards, course credit, other incentives)?\n", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/compensationType" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/compensationType", + "alias": "compensation_type", + "owner": "HumanSubjectCompensation", + "domain_of": [ + "HumanSubjectCompensation" + ], + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "humanSubjectCompensation__compensation_amount", + "description": "What was the amount or value of compensation provided? Include currency or equivalent value.\n", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/compensationAmount" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/compensationAmount", + "alias": "compensation_amount", + "owner": "HumanSubjectCompensation", + "domain_of": [ + "HumanSubjectCompensation" + ], + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "humanSubjectCompensation__compensation_rationale", + "description": "What was the rationale for the compensation structure? How was the amount determined to be appropriate?\n", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/compensationRationale" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/compensationRationale", + "alias": "compensation_rationale", + "owner": "HumanSubjectCompensation", + "domain_of": [ + "HumanSubjectCompensation" + ], + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "atRiskPopulations__at_risk_groups_included", "description": "Are any at-risk populations included (e.g., children, pregnant women, prisoners, cognitively impaired individuals)?\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#vulnerable_groups_included", - "alias": "vulnerable_groups_included", - "owner": "VulnerablePopulations", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/atRiskGroupsIncluded" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/atRiskGroupsIncluded", + "alias": "at_risk_groups_included", + "owner": "AtRiskPopulations", "domain_of": [ - "VulnerablePopulations" + "AtRiskPopulations" ], "range": "boolean", "@type": "SlotDefinition" }, { - "name": "vulnerablePopulations__special_protections", + "name": "atRiskPopulations__special_protections", "description": "What additional protections were implemented for at-risk populations? Include safeguards, modified procedures, or additional oversight.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#special_protections", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/specialProtections" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/specialProtections", "alias": "special_protections", - "owner": "VulnerablePopulations", + "owner": "AtRiskPopulations", "domain_of": [ - "VulnerablePopulations" + "AtRiskPopulations" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { - "name": "vulnerablePopulations__assent_procedures", + "name": "atRiskPopulations__assent_procedures", "description": "For research involving minors, what assent procedures were used? How was developmentally appropriate assent obtained?\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#assent_procedures", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/assentProcedures" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/assentProcedures", "alias": "assent_procedures", - "owner": "VulnerablePopulations", + "owner": "AtRiskPopulations", "domain_of": [ - "VulnerablePopulations" + "AtRiskPopulations" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { - "name": "vulnerablePopulations__guardian_consent", + "name": "atRiskPopulations__guardian_consent", "description": "For participants unable to provide their own consent, how was guardian or surrogate consent obtained?\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#guardian_consent", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/guardianConsent" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/guardianConsent", "alias": "guardian_consent", - "owner": "VulnerablePopulations", + "owner": "AtRiskPopulations", "domain_of": [ - "VulnerablePopulations" + "AtRiskPopulations" ], "range": "string", "multivalued": true, @@ -5389,7 +6143,10 @@ "name": "exportControlRegulatoryRestrictions__hipaa_compliant", "description": "Indicates compliance with the Health Insurance Portability and Accountability Act (HIPAA). HIPAA applies to protected health information in the United States.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance#hipaa_compliant", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/hipaaCompliant" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/hipaaCompliant", "alias": "hipaa_compliant", "owner": "ExportControlRegulatoryRestrictions", "domain_of": [ @@ -5402,7 +6159,10 @@ "name": "exportControlRegulatoryRestrictions__other_compliance", "description": "Other regulatory compliance frameworks applicable to this dataset (e.g., CCPA, PIPEDA, industry-specific regulations).", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance#other_compliance", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/otherCompliance" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/otherCompliance", "alias": "other_compliance", "owner": "ExportControlRegulatoryRestrictions", "domain_of": [ @@ -5416,7 +6176,10 @@ "name": "exportControlRegulatoryRestrictions__confidentiality_level", "description": "Confidentiality classification of the dataset indicating level of access restrictions and sensitivity.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/data-governance#confidentiality_level", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/confidentialityLevel" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/confidentialityLevel", "alias": "confidentiality_level", "owner": "ExportControlRegulatoryRestrictions", "domain_of": [ @@ -5507,7 +6270,10 @@ "name": "variableMetadata__missing_value_code", "description": "Code(s) used to represent missing values for this variable. Examples: \"NA\", \"-999\", \"null\", \"\". Multiple codes may be specified.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/variables", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/variables#missing_value_code", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/missingValueCode" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/missingValueCode", "alias": "missing_value_code", "owner": "VariableMetadata", "domain_of": [ @@ -5603,7 +6369,10 @@ "name": "variableMetadata__is_sensitive", "description": "Indicates whether this variable contains sensitive information (e.g., personal data, protected health information).", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/variables", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/variables#is_sensitive", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/isSensitive" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/isSensitive", "alias": "is_sensitive", "owner": "VariableMetadata", "domain_of": [ @@ -5616,7 +6385,10 @@ "name": "variableMetadata__precision", "description": "The precision or number of decimal places for numeric variables.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/variables", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/variables#precision", + "mappings": [ + "http://schema.org/valuePrecision" + ], + "slot_uri": "http://schema.org/valuePrecision", "alias": "precision", "owner": "VariableMetadata", "domain_of": [ @@ -5645,7 +6417,10 @@ "name": "variableMetadata__derivation", "description": "Description of how this variable was derived or calculated from other variables, if applicable.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/variables", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/variables#derivation", + "mappings": [ + "http://purl.org/dc/terms/provenance" + ], + "slot_uri": "http://purl.org/dc/terms/provenance", "alias": "derivation", "owner": "VariableMetadata", "domain_of": [ @@ -5658,7 +6433,10 @@ "name": "variableMetadata__quality_notes", "description": "Notes about data quality, reliability, or known issues specific to this variable.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/variables", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/variables#quality_notes", + "mappings": [ + "http://purl.org/dc/terms/description" + ], + "slot_uri": "http://purl.org/dc/terms/description", "alias": "quality_notes", "owner": "VariableMetadata", "domain_of": [ @@ -5668,14 +6446,82 @@ "multivalued": true, "@type": "SlotDefinition" }, + { + "name": "file__file_type", + "description": "Semantic type or purpose of this file (e.g., data_file, code_file, documentation_file, metadata_file).", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/fileType" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/fileType", + "alias": "file_type", + "owner": "File", + "domain_of": [ + "File" + ], + "range": "FileTypeEnum", + "@type": "SlotDefinition" + }, + { + "name": "fileCollection__collection_type", + "description": "Type(s) of content in this file collection. A collection may have multiple types, for example a collection containing both raw_data and documentation files would have both types listed.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/collectionType" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/collectionType", + "alias": "collection_type", + "owner": "FileCollection", + "domain_of": [ + "FileCollection" + ], + "range": "FileCollectionTypeEnum", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "fileCollection__file_count", + "description": "Number of files in this collection.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection", + "mappings": [ + "https://w3id.org/bridge2ai/data-sheets-schema/fileCount" + ], + "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/fileCount", + "alias": "file_count", + "owner": "FileCollection", + "domain_of": [ + "FileCollection" + ], + "range": "integer", + "@type": "SlotDefinition" + }, + { + "name": "fileCollection__total_bytes", + "description": "Total size of all files in bytes.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection", + "mappings": [ + "http://www.w3.org/ns/dcat#byteSize" + ], + "slot_uri": "http://www.w3.org/ns/dcat#byteSize", + "alias": "total_bytes", + "owner": "FileCollection", + "domain_of": [ + "FileCollection" + ], + "range": "integer", + "@type": "SlotDefinition" + }, { "name": "DatasetCollection_resources", "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/resources", - "description": "Sub-resources or component datasets. Used in DatasetCollection to contain Dataset objects, and in Dataset to allow nested resource structures.", + "description": "Sub-resources or component items. In DatasetCollection, contains Dataset objects. In Dataset, contains nested Dataset objects. In FileCollection, contains nested FileCollection objects. The specific range is defined via slot_usage in each class.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", + "mappings": [ + "http://schema.org/hasPart" + ], "is_a": "resources", "domain": "DatasetCollection", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/resources", + "slot_uri": "http://schema.org/hasPart", "alias": "resources", "owner": "DatasetCollection", "domain_of": [ @@ -5692,7 +6538,7 @@ { "name": "Dataset_external_resources", "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/external_resources", - "description": "Links or identifiers for external resources. Can be used either as a list of ExternalResource objects (in Dataset) or as a list of URL strings (within ExternalResource class).", + "description": "External resources referenced at the dataset level (e.g., related publications, repositories, documentation). For file-level external resources, use FileCollection.external_resources.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", "mappings": [ "http://purl.org/dc/terms/references" @@ -5716,44 +6562,147 @@ { "name": "Dataset_resources", "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/resources", - "description": "Sub-resources or component datasets that are part of this dataset. Allows datasets to contain nested resource structures.", + "description": "Sub-resources or component datasets that are part of this dataset. Note: For file collections, use the file_collections attribute instead.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", + "mappings": [ + "http://schema.org/hasPart" + ], "is_a": "resources", "domain": "Dataset", - "slot_uri": "https://w3id.org/bridge2ai/data-sheets-schema/resources", + "slot_uri": "http://schema.org/hasPart", "alias": "resources", "owner": "Dataset", "domain_of": [ "Dataset" ], "is_usage_slot": true, - "usage_slot_name": "resources", - "range": "Dataset", + "usage_slot_name": "resources", + "range": "Dataset", + "multivalued": true, + "inlined": true, + "inlined_as_list": true, + "@type": "SlotDefinition" + }, + { + "name": "ExternalResource_external_resources", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/external_resources", + "description": "List of links or identifiers for external resources.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", + "mappings": [ + "http://purl.org/dc/terms/references" + ], + "is_a": "external_resources", + "domain": "ExternalResource", + "slot_uri": "http://purl.org/dc/terms/references", + "alias": "external_resources", + "owner": "ExternalResource", + "domain_of": [ + "ExternalResource" + ], + "is_usage_slot": true, + "usage_slot_name": "external_resources", + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "FileCollection_path", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/path", + "description": "Path or URL to the FileCollection. May be a directory path, archive file path, or download URL depending on how the collection is distributed.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", + "mappings": [ + "http://schema.org/contentUrl" + ], + "is_a": "path", + "domain": "FileCollection", + "slot_uri": "http://schema.org/contentUrl", + "alias": "path", + "owner": "FileCollection", + "domain_of": [ + "FileCollection" + ], + "is_usage_slot": true, + "usage_slot_name": "path", + "range": "string", + "@type": "SlotDefinition" + }, + { + "name": "FileCollection_compression", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/compression", + "description": "Compression format if the collection is packaged as a compressed archive (e.g., gzip, zip, bzip2). Omit this field for uncompressed collections or purely logical groupings.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", + "mappings": [ + "http://www.w3.org/ns/dcat#compressFormat" + ], + "is_a": "compression", + "domain": "FileCollection", + "slot_uri": "http://www.w3.org/ns/dcat#compressFormat", + "alias": "compression", + "owner": "FileCollection", + "domain_of": [ + "FileCollection" + ], + "is_usage_slot": true, + "usage_slot_name": "compression", + "range": "CompressionEnum", + "@type": "SlotDefinition" + }, + { + "name": "FileCollection_external_resources", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/external_resources", + "description": "External files or URLs referenced by this file collection.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", + "mappings": [ + "http://purl.org/dc/terms/references" + ], + "is_a": "external_resources", + "domain": "FileCollection", + "slot_uri": "http://purl.org/dc/terms/references", + "alias": "external_resources", + "owner": "FileCollection", + "domain_of": [ + "FileCollection" + ], + "is_usage_slot": true, + "usage_slot_name": "external_resources", + "range": "ExternalResource", "multivalued": true, "inlined": true, "inlined_as_list": true, "@type": "SlotDefinition" }, { - "name": "ExternalResource_external_resources", - "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/external_resources", - "description": "List of links or identifiers for external resources.", + "name": "FileCollection_resources", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/resources", + "description": "Individual files or nested file collections within this collection. Allows hierarchical file organization with both File objects and nested FileCollection objects.", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/base", "mappings": [ - "http://purl.org/dc/terms/references" + "http://schema.org/hasPart" ], - "is_a": "external_resources", - "domain": "ExternalResource", - "slot_uri": "http://purl.org/dc/terms/references", - "alias": "external_resources", - "owner": "ExternalResource", + "is_a": "resources", + "domain": "FileCollection", + "slot_uri": "http://schema.org/hasPart", + "alias": "resources", + "owner": "FileCollection", "domain_of": [ - "ExternalResource" + "FileCollection" ], "is_usage_slot": true, - "usage_slot_name": "external_resources", - "range": "string", + "usage_slot_name": "resources", + "range": "Dataset", "multivalued": true, + "inlined": true, + "inlined_as_list": true, + "any_of": [ + { + "range": "File", + "@type": "AnonymousSlotExpression" + }, + { + "range": "FileCollection", + "@type": "AnonymousSlotExpression" + } + ], "@type": "SlotDefinition" } ], @@ -5847,17 +6796,11 @@ "title", "version", "was_derived_from", - "bytes", - "dialect", - "encoding", - "format", - "hash", - "md5", - "media_type", - "path", - "sha256", "Dataset_external_resources", "Dataset_resources", + "dataset__file_collections", + "dataset__total_file_count", + "dataset__total_size_bytes", "dataset__purposes", "dataset__tasks", "dataset__addressing_gaps", @@ -5883,7 +6826,9 @@ "dataset__data_protection_impacts", "dataset__human_subject_research", "dataset__informed_consent", - "dataset__vulnerable_populations", + "dataset__at_risk_populations", + "dataset__participant_privacy", + "dataset__participant_compensation", "dataset__preprocessing_strategies", "dataset__cleaning_strategies", "dataset__labeling_strategies", @@ -5918,8 +6863,35 @@ ], "slot_usage": {}, "attributes": [ + { + "name": "file_collections", + "description": "Collections of files within this dataset. Each collection represents a logical grouping of files with shared characteristics (e.g., all training data, all image files, all raw data files). Maps to nested RO-Crate Dataset entities via schema:hasPart.", + "exact_mappings": [ + "dcat:distribution" + ], + "slot_uri": "schema:hasPart", + "range": "FileCollection", + "multivalued": true, + "inlined_as_list": true, + "@type": "SlotDefinition" + }, + { + "name": "total_file_count", + "description": "Total number of files across all file collections in this dataset. Can be aggregated from file_collections[].file_count.", + "slot_uri": "d4d:totalFileCount", + "range": "integer", + "@type": "SlotDefinition" + }, + { + "name": "total_size_bytes", + "description": "Total size of all files in bytes across all file collections. Can be aggregated from file_collections[].total_bytes.", + "slot_uri": "dcat:byteSize", + "range": "integer", + "@type": "SlotDefinition" + }, { "name": "purposes", + "slot_uri": "d4d:purposes", "range": "Purpose", "multivalued": true, "inlined_as_list": true, @@ -5927,6 +6899,7 @@ }, { "name": "tasks", + "slot_uri": "d4d:tasks", "range": "Task", "multivalued": true, "inlined_as_list": true, @@ -5934,6 +6907,7 @@ }, { "name": "addressing_gaps", + "slot_uri": "d4d:addressingGaps", "range": "AddressingGap", "multivalued": true, "inlined_as_list": true, @@ -5941,6 +6915,7 @@ }, { "name": "creators", + "slot_uri": "schema:creator", "range": "Creator", "multivalued": true, "inlined_as_list": true, @@ -5948,6 +6923,7 @@ }, { "name": "funders", + "slot_uri": "schema:funder", "range": "FundingMechanism", "multivalued": true, "inlined_as_list": true, @@ -5966,6 +6942,7 @@ }, { "name": "instances", + "slot_uri": "d4d:instances", "range": "Instance", "multivalued": true, "inlined_as_list": true, @@ -5973,6 +6950,7 @@ }, { "name": "anomalies", + "slot_uri": "d4d:anomalies", "range": "DataAnomaly", "multivalued": true, "inlined_as_list": true, @@ -5981,6 +6959,7 @@ { "name": "known_biases", "description": "Known biases present in the dataset that may affect fairness, representativeness, or model performance. Uses BiasTypeEnum for standardized bias categorization mapped to the AI Ontology (AIO).", + "slot_uri": "d4d:known_biases", "range": "DatasetBias", "multivalued": true, "inlined_as_list": true, @@ -5989,6 +6968,7 @@ { "name": "known_limitations", "description": "Known limitations of the dataset that may affect its use or interpretation. Distinct from biases (systematic errors) and anomalies (data quality issues).", + "slot_uri": "d4d:known_limitations", "range": "DatasetLimitation", "multivalued": true, "inlined_as_list": true, @@ -5996,6 +6976,7 @@ }, { "name": "confidential_elements", + "slot_uri": "d4d:confidentialElements", "range": "Confidentiality", "multivalued": true, "inlined_as_list": true, @@ -6003,6 +6984,7 @@ }, { "name": "content_warnings", + "slot_uri": "d4d:contentWarnings", "range": "ContentWarning", "multivalued": true, "inlined_as_list": true, @@ -6010,6 +6992,7 @@ }, { "name": "subpopulations", + "slot_uri": "d4d:subpopulations", "range": "Subpopulation", "multivalued": true, "inlined_as_list": true, @@ -6017,6 +7000,7 @@ }, { "name": "sensitive_elements", + "slot_uri": "d4d:sensitiveElements", "range": "SensitiveElement", "multivalued": true, "inlined_as_list": true, @@ -6024,6 +7008,7 @@ }, { "name": "acquisition_methods", + "slot_uri": "d4d:acquisitionMethods", "range": "InstanceAcquisition", "multivalued": true, "inlined_as_list": true, @@ -6031,6 +7016,7 @@ }, { "name": "collection_mechanisms", + "slot_uri": "d4d:collectionMechanisms", "range": "CollectionMechanism", "multivalued": true, "inlined_as_list": true, @@ -6038,6 +7024,7 @@ }, { "name": "sampling_strategies", + "slot_uri": "d4d:samplingStrategies", "range": "SamplingStrategy", "multivalued": true, "inlined_as_list": true, @@ -6045,6 +7032,7 @@ }, { "name": "data_collectors", + "slot_uri": "d4d:dataCollectors", "range": "DataCollector", "multivalued": true, "inlined_as_list": true, @@ -6052,6 +7040,7 @@ }, { "name": "collection_timeframes", + "slot_uri": "d4d:collectionTimeframes", "range": "CollectionTimeframe", "multivalued": true, "inlined_as_list": true, @@ -6060,6 +7049,7 @@ { "name": "missing_data_documentation", "description": "Documentation of missing data patterns and handling strategies.", + "slot_uri": "d4d:missingDataDocumentation", "range": "MissingDataDocumentation", "multivalued": true, "inlined_as_list": true, @@ -6068,6 +7058,7 @@ { "name": "raw_data_sources", "description": "Description of raw data sources before preprocessing.", + "slot_uri": "d4d:rawDataSources", "range": "RawDataSource", "multivalued": true, "inlined_as_list": true, @@ -6075,6 +7066,7 @@ }, { "name": "ethical_reviews", + "slot_uri": "d4d:ethicalReviews", "range": "EthicalReview", "multivalued": true, "inlined_as_list": true, @@ -6082,6 +7074,7 @@ }, { "name": "data_protection_impacts", + "slot_uri": "d4d:dataProtectionImpacts", "range": "DataProtectionImpact", "multivalued": true, "inlined_as_list": true, @@ -6090,6 +7083,7 @@ { "name": "human_subject_research", "description": "Information about whether dataset involves human subjects research, including IRB approval, ethics review, and regulatory compliance.", + "slot_uri": "d4d:humanSubjectResearch", "range": "HumanSubjectResearch", "inlined": true, "@type": "SlotDefinition" @@ -6097,20 +7091,41 @@ { "name": "informed_consent", "description": "Details about informed consent procedures, including consent type, documentation, and withdrawal mechanisms.", + "slot_uri": "d4d:informedConsent", "range": "InformedConsent", "multivalued": true, "inlined_as_list": true, "@type": "SlotDefinition" }, { - "name": "vulnerable_populations", + "name": "at_risk_populations", "description": "Information about protections for at-risk populations (e.g., minors, pregnant women, prisoners) including special safeguards and assent procedures.", - "range": "VulnerablePopulations", + "slot_uri": "d4d:atRiskPopulations", + "range": "AtRiskPopulations", "inlined": true, "@type": "SlotDefinition" }, + { + "name": "participant_privacy", + "description": "Information about privacy protections and anonymization procedures for human research participants.", + "slot_uri": "d4d:participantPrivacy", + "range": "ParticipantPrivacy", + "multivalued": true, + "inlined_as_list": true, + "@type": "SlotDefinition" + }, + { + "name": "participant_compensation", + "description": "Information about compensation or incentives provided to human research participants.", + "slot_uri": "d4d:participantCompensation", + "range": "HumanSubjectCompensation", + "multivalued": true, + "inlined_as_list": true, + "@type": "SlotDefinition" + }, { "name": "preprocessing_strategies", + "slot_uri": "d4d:preprocessingStrategies", "range": "PreprocessingStrategy", "multivalued": true, "inlined_as_list": true, @@ -6118,6 +7133,7 @@ }, { "name": "cleaning_strategies", + "slot_uri": "d4d:cleaningStrategies", "range": "CleaningStrategy", "multivalued": true, "inlined_as_list": true, @@ -6125,6 +7141,7 @@ }, { "name": "labeling_strategies", + "slot_uri": "d4d:labelingStrategies", "range": "LabelingStrategy", "multivalued": true, "inlined_as_list": true, @@ -6132,6 +7149,7 @@ }, { "name": "raw_sources", + "slot_uri": "d4d:rawSources", "range": "RawData", "multivalued": true, "inlined_as_list": true, @@ -6140,6 +7158,7 @@ { "name": "imputation_protocols", "description": "Data imputation methodology and techniques.", + "slot_uri": "d4d:imputation_protocols", "range": "ImputationProtocol", "multivalued": true, "inlined_as_list": true, @@ -6148,6 +7167,7 @@ { "name": "annotation_analyses", "description": "Analysis of annotation quality and inter-annotator agreement.", + "slot_uri": "d4d:annotation_analyses", "range": "AnnotationAnalysis", "multivalued": true, "inlined_as_list": true, @@ -6163,6 +7183,7 @@ }, { "name": "existing_uses", + "slot_uri": "d4d:existingUses", "range": "ExistingUse", "multivalued": true, "inlined_as_list": true, @@ -6170,6 +7191,7 @@ }, { "name": "use_repository", + "slot_uri": "d4d:useRepository", "range": "UseRepository", "multivalued": true, "inlined_as_list": true, @@ -6177,6 +7199,7 @@ }, { "name": "other_tasks", + "slot_uri": "d4d:otherTasks", "range": "OtherTask", "multivalued": true, "inlined_as_list": true, @@ -6184,6 +7207,7 @@ }, { "name": "future_use_impacts", + "slot_uri": "d4d:futureUseImpacts", "range": "FutureUseImpact", "multivalued": true, "inlined_as_list": true, @@ -6191,6 +7215,7 @@ }, { "name": "discouraged_uses", + "slot_uri": "d4d:discouragedUses", "range": "DiscouragedUse", "multivalued": true, "inlined_as_list": true, @@ -6199,6 +7224,7 @@ { "name": "intended_uses", "description": "Explicit intended and recommended uses for this dataset. Complements future_use_impacts by focusing on positive applications.", + "slot_uri": "d4d:intendedUses", "range": "IntendedUse", "multivalued": true, "inlined_as_list": true, @@ -6207,6 +7233,7 @@ { "name": "prohibited_uses", "description": "Explicitly prohibited or forbidden uses for this dataset. Stronger than discouraged_uses - these are not permitted.", + "slot_uri": "d4d:prohibitedUses", "range": "ProhibitedUse", "multivalued": true, "inlined_as_list": true, @@ -6214,6 +7241,7 @@ }, { "name": "distribution_formats", + "slot_uri": "d4d:distributionFormats", "range": "DistributionFormat", "multivalued": true, "inlined_as_list": true, @@ -6221,6 +7249,7 @@ }, { "name": "distribution_dates", + "slot_uri": "d4d:distributionDates", "range": "DistributionDate", "multivalued": true, "inlined_as_list": true, @@ -6228,24 +7257,28 @@ }, { "name": "license_and_use_terms", + "slot_uri": "schema:license", "range": "LicenseAndUseTerms", "inlined": true, "@type": "SlotDefinition" }, { "name": "ip_restrictions", + "slot_uri": "d4d:ipRestrictions", "range": "IPRestrictions", "inlined": true, "@type": "SlotDefinition" }, { "name": "regulatory_restrictions", + "slot_uri": "d4d:regulatoryRestrictions", "range": "ExportControlRegulatoryRestrictions", "inlined": true, "@type": "SlotDefinition" }, { "name": "maintainers", + "slot_uri": "d4d:maintainers", "range": "Maintainer", "multivalued": true, "inlined_as_list": true, @@ -6253,6 +7286,7 @@ }, { "name": "errata", + "slot_uri": "d4d:errata", "range": "Erratum", "multivalued": true, "inlined_as_list": true, @@ -6260,24 +7294,28 @@ }, { "name": "updates", + "slot_uri": "d4d:updates", "range": "UpdatePlan", "inlined": true, "@type": "SlotDefinition" }, { "name": "retention_limit", + "slot_uri": "d4d:retentionLimit", "range": "RetentionLimits", "inlined": true, "@type": "SlotDefinition" }, { "name": "version_access", + "slot_uri": "dcat:accessURL", "range": "VersionAccess", "inlined": true, "@type": "SlotDefinition" }, { "name": "extension_mechanism", + "slot_uri": "d4d:extensionMechanism", "range": "ExtensionMechanism", "inlined": true, "@type": "SlotDefinition" @@ -6296,12 +7334,14 @@ }, { "name": "is_deidentified", + "slot_uri": "d4d:isDeidentified", "range": "Deidentification", "inlined": true, "@type": "SlotDefinition" }, { "name": "is_tabular", + "slot_uri": "schema:encodingFormat", "range": "boolean", "@type": "SlotDefinition" }, @@ -6330,6 +7370,7 @@ { "name": "related_datasets", "description": "Related datasets with typed relationships (e.g., supplements, derives from, is version of). Use DatasetRelationship class to specify relationship types.", + "slot_uri": "schema:isRelatedTo", "range": "DatasetRelationship", "multivalued": true, "inlined_as_list": true, @@ -6369,17 +7410,11 @@ "title", "version", "was_derived_from", - "bytes", - "dialect", - "encoding", - "format", - "hash", - "md5", - "media_type", - "path", - "sha256", "Dataset_external_resources", "Dataset_resources", + "dataset__file_collections", + "dataset__total_file_count", + "dataset__total_size_bytes", "dataset__purposes", "dataset__tasks", "dataset__addressing_gaps", @@ -6405,7 +7440,9 @@ "dataset__data_protection_impacts", "dataset__human_subject_research", "dataset__informed_consent", - "dataset__vulnerable_populations", + "dataset__at_risk_populations", + "dataset__participant_privacy", + "dataset__participant_compensation", "dataset__preprocessing_strategies", "dataset__cleaning_strategies", "dataset__labeling_strategies", @@ -6554,6 +7591,7 @@ { "name": "used_software", "description": "What software was used as part of this dataset property?", + "slot_uri": "d4d:usedSoftware", "range": "Software", "multivalued": true, "inlined_as_list": true, @@ -6849,6 +7887,7 @@ { "name": "credit_roles", "description": "Contributor roles using the CRediT (Contributor Roles Taxonomy) for the principal investigator or creator team. Specifies the specific contributions made to this dataset (e.g., Conceptualization, Data Curation, Methodology). Note: roles are specified here rather than on Person directly, since the same person may have different roles across different datasets.", + "slot_uri": "d4d:creditRoles", "range": "CRediTRoleEnum", "multivalued": true, "@type": "SlotDefinition" @@ -6992,6 +8031,7 @@ { "name": "label", "description": "Is there a label or target associated with each instance?\n", + "slot_uri": "d4d:hasLabel", "range": "boolean", "@type": "SlotDefinition" }, @@ -7005,6 +8045,7 @@ { "name": "sampling_strategies", "description": "References to one or more SamplingStrategy objects.\n", + "slot_uri": "d4d:samplingStrategies", "range": "SamplingStrategy", "multivalued": true, "@type": "SlotDefinition" @@ -7012,6 +8053,7 @@ { "name": "missing_information", "description": "References to one or more MissingInfo objects describing missing data.\n", + "slot_uri": "d4d:missingInformation", "range": "MissingInfo", "multivalued": true, "@type": "SlotDefinition" @@ -7044,6 +8086,7 @@ { "name": "is_sample", "description": "Indicates whether it is a sample of a larger set.", + "slot_uri": "d4d:isSample", "range": "boolean", "multivalued": true, "@type": "SlotDefinition" @@ -7051,6 +8094,7 @@ { "name": "is_random", "description": "Indicates whether the sample is random.", + "slot_uri": "d4d:isRandom", "range": "boolean", "multivalued": true, "@type": "SlotDefinition" @@ -7058,6 +8102,7 @@ { "name": "source_data", "description": "Description of the larger set from which the sample was drawn, if any.\n", + "slot_uri": "d4d:sourceData", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7065,6 +8110,7 @@ { "name": "is_representative", "description": "Indicates whether the sample is representative of the larger set.\n", + "slot_uri": "d4d:isRepresentative", "range": "boolean", "multivalued": true, "@type": "SlotDefinition" @@ -7072,6 +8118,7 @@ { "name": "representative_verification", "description": "Explanation of how representativeness was validated or verified.\n", + "slot_uri": "schema:description", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7079,6 +8126,7 @@ { "name": "why_not_representative", "description": "Explanation of why the sample is not representative, if applicable.\n", + "slot_uri": "d4d:whyNotRepresentative", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7086,6 +8134,7 @@ { "name": "strategies", "description": "Description of the sampling strategy (deterministic, probabilistic, etc.).\n", + "slot_uri": "d4d:strategies", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7235,6 +8284,7 @@ { "name": "bias_type", "description": "The type of bias identified, using standardized categories from the Artificial Intelligence Ontology (AIO).\n", + "slot_uri": "d4d:biasType", "range": "BiasTypeEnum", "@type": "SlotDefinition" }, @@ -7248,12 +8298,14 @@ { "name": "mitigation_strategy", "description": "Steps taken or recommended to mitigate this bias.\n", + "slot_uri": "d4d:mitigation_strategy", "range": "string", "@type": "SlotDefinition" }, { "name": "affected_subsets", "description": "Specific subsets or features of the dataset affected by this bias.\n", + "slot_uri": "d4d:affectedSubsets", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7286,6 +8338,7 @@ { "name": "limitation_type", "description": "Category of limitation (e.g., scope, coverage, temporal, methodological).\n", + "slot_uri": "d4d:limitationType", "range": "LimitationTypeEnum", "@type": "SlotDefinition" }, @@ -7299,12 +8352,14 @@ { "name": "scope_impact", "description": "How this limitation affects the scope or applicability of the dataset.\n", + "slot_uri": "d4d:scopeImpact", "range": "string", "@type": "SlotDefinition" }, { "name": "recommended_mitigation", "description": "Recommended approaches for users to address this limitation.\n", + "slot_uri": "d4d:recommendedMitigation", "range": "string", "@type": "SlotDefinition" } @@ -7341,6 +8396,7 @@ { "name": "archival", "description": "Indication whether official archival versions of external resources are included.\n", + "slot_uri": "schema:archivedAt", "range": "boolean", "multivalued": true, "@type": "SlotDefinition" @@ -7376,6 +8432,7 @@ { "name": "confidential_elements_present", "description": "Indicates whether any confidential data elements are present.", + "slot_uri": "d4d:confidential_elements_present", "range": "boolean", "@type": "SlotDefinition" }, @@ -7410,6 +8467,7 @@ { "name": "content_warnings_present", "description": "Indicates whether any content warnings are needed.", + "slot_uri": "d4d:content_warnings_present", "range": "boolean", "@type": "SlotDefinition" }, @@ -7444,6 +8502,7 @@ { "name": "subpopulation_elements_present", "description": "Indicates whether any subpopulations are explicitly identified.", + "slot_uri": "d4d:subpopulationElementsPresent", "range": "boolean", "@type": "SlotDefinition" }, @@ -7486,6 +8545,7 @@ { "name": "identifiable_elements_present", "description": "Indicates whether data subjects can be identified.", + "slot_uri": "d4d:identifiableElementsPresent", "range": "boolean", "@type": "SlotDefinition" }, @@ -7498,6 +8558,7 @@ { "name": "identifiers_removed", "description": "List of identifier types removed during de-identification.", + "slot_uri": "schema:identifier", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7536,6 +8597,7 @@ { "name": "sensitive_elements_present", "description": "Indicates whether sensitive data elements are present.", + "slot_uri": "d4d:sensitive_elements_present", "range": "boolean", "@type": "SlotDefinition" }, @@ -7570,6 +8632,7 @@ { "name": "target_dataset", "description": "The dataset that this relationship points to. Can be specified by identifier, URL, or Dataset object.", + "slot_uri": "schema:identifier", "range": "string", "required": true, "@type": "SlotDefinition" @@ -7577,6 +8640,7 @@ { "name": "relationship_type", "description": "The type of relationship (e.g., derives_from, supplements, is_version_of). Uses DatasetRelationshipTypeEnum for standardized relationship types.", + "slot_uri": "schema:additionalType", "range": "DatasetRelationshipTypeEnum", "required": true, "@type": "SlotDefinition" @@ -7613,24 +8677,28 @@ { "name": "was_directly_observed", "description": "Whether the data was directly observed", + "slot_uri": "d4d:wasDirectlyObserved", "range": "boolean", "@type": "SlotDefinition" }, { "name": "was_reported_by_subjects", "description": "Whether the data was reported directly by the subjects themselves", + "slot_uri": "d4d:wasReportedBySubjects", "range": "boolean", "@type": "SlotDefinition" }, { "name": "was_inferred_derived", "description": "Whether the data was inferred or derived from other data", + "slot_uri": "d4d:wasInferred", "range": "boolean", "@type": "SlotDefinition" }, { "name": "was_validated_verified", "description": "Whether the data was validated or verified in any way", + "slot_uri": "d4d:wasValidated", "range": "boolean", "@type": "SlotDefinition" }, @@ -7695,6 +8763,7 @@ { "name": "role", "description": "Role of the data collector (e.g., researcher, crowdworker)", + "slot_uri": "schema:roleName", "range": "string", "@type": "SlotDefinition" }, @@ -7733,12 +8802,14 @@ { "name": "start_date", "description": "Start date of data collection", + "slot_uri": "schema:startDate", "range": "date", "@type": "SlotDefinition" }, { "name": "end_date", "description": "End date of data collection", + "slot_uri": "schema:endDate", "range": "date", "@type": "SlotDefinition" }, @@ -7773,6 +8844,7 @@ { "name": "is_direct", "description": "Whether collection was direct from individuals", + "slot_uri": "d4d:isDirect", "range": "boolean", "@type": "SlotDefinition" }, @@ -7811,6 +8883,7 @@ { "name": "missing_data_patterns", "description": "Description of patterns in missing data (e.g., missing completely at random, missing at random, missing not at random).\n", + "slot_uri": "d4d:missingDataPatterns", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7818,6 +8891,7 @@ { "name": "missing_data_causes", "description": "Known or suspected causes of missing data (e.g., sensor failures, participant dropout, privacy constraints).\n", + "slot_uri": "d4d:missingDataCauses", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7825,6 +8899,7 @@ { "name": "handling_strategy", "description": "Strategy used to handle missing data (e.g., deletion, imputation, flagging, multiple imputation).\n", + "slot_uri": "d4d:handlingStrategy", "range": "string", "@type": "SlotDefinition" } @@ -7856,6 +8931,7 @@ { "name": "source_description", "description": "Detailed description of where raw data comes from (e.g., sensors, databases, web APIs, manual collection).\n", + "slot_uri": "dcterms:description", "range": "string", "required": true, "@type": "SlotDefinition" @@ -7863,6 +8939,7 @@ { "name": "source_type", "description": "Type of raw source (sensor, database, user input, web scraping, etc.).\n", + "slot_uri": "dcterms:type", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7870,12 +8947,14 @@ { "name": "access_details", "description": "Information on how to access or retrieve the raw source data.\n", + "slot_uri": "d4d:accessDetails", "range": "string", "@type": "SlotDefinition" }, { "name": "raw_data_format", "description": "Format of the raw data before any preprocessing.\n", + "slot_uri": "d4d:rawDataFormat", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7980,6 +9059,7 @@ "exact_mappings": [ "rai:dataAnnotationProtocol" ], + "slot_uri": "d4d:dataAnnotationProtocol", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -7990,6 +9070,7 @@ "exact_mappings": [ "rai:annotationsPerItem" ], + "slot_uri": "d4d:annotationsPerItem", "range": "integer", "@type": "SlotDefinition" }, @@ -8006,6 +9087,7 @@ "exact_mappings": [ "rai:annotatorDemographics" ], + "slot_uri": "d4d:annotatorDemographics", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8041,6 +9123,7 @@ { "name": "access_url", "description": "URL or access point for the raw data.", + "slot_uri": "dcat:accessURL", "range": "uri", "@type": "SlotDefinition" }, @@ -8080,6 +9163,7 @@ { "name": "imputation_method", "description": "Specific imputation technique used (mean, median, mode, forward fill, backward fill, interpolation, model-based imputation, etc.).\n", + "slot_uri": "d4d:imputation_method", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8087,6 +9171,7 @@ { "name": "imputed_fields", "description": "Fields or columns where imputation was applied.\n", + "slot_uri": "d4d:imputed_fields", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8094,12 +9179,14 @@ { "name": "imputation_rationale", "description": "Justification for the imputation approach chosen, including assumptions made about missing data mechanisms.\n", + "slot_uri": "d4d:imputation_rationale", "range": "string", "@type": "SlotDefinition" }, { "name": "imputation_validation", "description": "Methods used to validate imputation quality (if any).\n", + "slot_uri": "d4d:imputation_validation", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8133,24 +9220,28 @@ { "name": "inter_annotator_agreement_score", "description": "Measured agreement between annotators (e.g., Cohen's kappa value, Fleiss' kappa, Krippendorff's alpha).\n", + "slot_uri": "d4d:interAnnotatorAgreementScore", "range": "float", "@type": "SlotDefinition" }, { "name": "agreement_metric", "description": "Type of agreement metric used (Cohen's kappa, Fleiss' kappa, Krippendorff's alpha, percentage agreement, etc.).\n", + "slot_uri": "d4d:agreementMetric", "range": "string", "@type": "SlotDefinition" }, { "name": "analysis_method", "description": "Methodology used to assess annotation quality and resolve disagreements.\n", + "slot_uri": "d4d:analysisMethod", "range": "string", "@type": "SlotDefinition" }, { "name": "disagreement_patterns", "description": "Systematic patterns in annotator disagreements (e.g., by demographic group, annotation difficulty, task type).\n", + "slot_uri": "d4d:disagreementPatterns", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8158,6 +9249,7 @@ { "name": "annotation_quality_details", "description": "Additional details on annotation quality assessment and findings.\n", + "slot_uri": "d4d:annotationQualityDetails", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8189,6 +9281,7 @@ { "name": "tools", "description": "List of automated annotation tools with their versions. Format each entry as \"ToolName version\" (e.g., \"spaCy 3.5.0\", \"NLTK 3.8\", \"GPT-4 turbo\"). Use \"unknown\" for version if not available (e.g., \"Custom NER Model unknown\").\n", + "slot_uri": "schema:name", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8196,6 +9289,7 @@ { "name": "tool_descriptions", "description": "Descriptions of what each tool does in the annotation process and what types of annotations it produces. Should correspond to the tools list.\n", + "slot_uri": "d4d:toolDescriptions", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8203,6 +9297,7 @@ { "name": "tool_accuracy", "description": "Known accuracy or performance metrics for the automated tools (if available). Include metric name and value (e.g., \"spaCy F1: 0.95\", \"GPT-4 Accuracy: 92%\").\n", + "slot_uri": "d4d:toolAccuracy", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8229,6 +9324,7 @@ { "name": "examples", "description": "List of examples of known/previous uses of the dataset.", + "slot_uri": "schema:example", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8391,6 +9487,7 @@ { "name": "use_category", "description": "Category of intended use (e.g., research, clinical, educational, commercial, policy).", + "slot_uri": "d4d:useCategory", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8417,6 +9514,7 @@ { "name": "prohibition_reason", "description": "Reason why this use is prohibited (e.g., license restriction, ethical concern, privacy risk, legal constraint).", + "slot_uri": "d4d:prohibitionReason", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8559,6 +9657,7 @@ { "name": "erratum_url", "description": "URL or access point for the erratum.", + "slot_uri": "dcat:accessURL", "range": "uri", "@type": "SlotDefinition" }, @@ -8596,6 +9695,7 @@ { "name": "frequency", "description": "How often updates are planned (e.g., quarterly, annually).", + "slot_uri": "d4d:frequency", "range": "string", "@type": "SlotDefinition" }, @@ -8630,6 +9730,7 @@ { "name": "retention_period", "description": "Time period for data retention.", + "slot_uri": "d4d:retentionPeriod", "range": "string", "@type": "SlotDefinition" }, @@ -8665,12 +9766,14 @@ { "name": "latest_version_doi", "description": "DOI or URL of the latest dataset version.", + "slot_uri": "schema:identifier", "range": "string", "@type": "SlotDefinition" }, { "name": "versions_available", "description": "List of available versions with metadata.", + "slot_uri": "d4d:versionsAvailable", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8706,6 +9809,7 @@ { "name": "contribution_url", "description": "URL for contribution guidelines or process.", + "slot_uri": "dcat:landingPage", "range": "uri", "@type": "SlotDefinition" }, @@ -8900,12 +10004,14 @@ { "name": "involves_human_subjects", "description": "Does this dataset involve human subjects research?", + "slot_uri": "d4d:involvesHumanSubjects", "range": "boolean", "@type": "SlotDefinition" }, { "name": "irb_approval", "description": "Was Institutional Review Board (IRB) approval obtained? Include approval number and institution if applicable.\n", + "slot_uri": "d4d:irbApproval", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8913,6 +10019,7 @@ { "name": "ethics_review_board", "description": "What ethics review board(s) reviewed this research? Include institution names and approval details.\n", + "slot_uri": "d4d:ethicsReviewBoard", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8920,6 +10027,7 @@ { "name": "special_populations", "description": "Does the research involve any special populations that require additional protections (e.g., minors, pregnant women, prisoners)?\n", + "slot_uri": "d4d:specialPopulations", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8927,6 +10035,7 @@ { "name": "regulatory_compliance", "description": "What regulatory frameworks govern this human subjects research (e.g., 45 CFR 46, HIPAA)?\n", + "slot_uri": "d4d:regulatoryCompliance", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8957,12 +10066,14 @@ { "name": "consent_obtained", "description": "Was informed consent obtained from all participants?", + "slot_uri": "d4d:consentObtained", "range": "boolean", "@type": "SlotDefinition" }, { "name": "consent_type", "description": "What type of consent was obtained (e.g., written, verbal, electronic, implied through participation)?\n", + "slot_uri": "d4d:consentType", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8970,6 +10081,7 @@ { "name": "consent_documentation", "description": "How is consent documented? Include references to consent forms or procedures used.\n", + "slot_uri": "d4d:consentDocumentation", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8977,6 +10089,7 @@ { "name": "withdrawal_mechanism", "description": "How can participants withdraw their consent? What procedures are in place for data deletion upon withdrawal?\n", + "slot_uri": "d4d:withdrawalMechanism", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8984,6 +10097,7 @@ { "name": "consent_scope", "description": "What specific uses did participants consent to? Are there limitations on data use based on consent?\n", + "slot_uri": "d4d:consentScope", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -8993,8 +10107,115 @@ "@type": "ClassDefinition" }, { - "name": "VulnerablePopulations", - "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#VulnerablePopulations", + "name": "ParticipantPrivacy", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#ParticipantPrivacy", + "description": "Information about privacy protections and anonymization procedures for human research participants.\n", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", + "is_a": "DatasetProperty", + "slots": [ + "datasetProperty__id", + "datasetProperty__name", + "datasetProperty__description", + "datasetProperty__used_software", + "participantPrivacy__anonymization_method", + "participantPrivacy__reidentification_risk", + "participantPrivacy__privacy_techniques", + "participantPrivacy__data_linkage" + ], + "slot_usage": {}, + "attributes": [ + { + "name": "anonymization_method", + "description": "What methods were used to anonymize or de-identify participant data? Include technical details of privacy-preserving techniques.\n", + "slot_uri": "d4d:anonymizationMethod", + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "reidentification_risk", + "description": "What is the assessed risk of re-identification? What measures were taken to minimize this risk?\n", + "slot_uri": "d4d:reidentificationRisk", + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "privacy_techniques", + "description": "What privacy-preserving techniques were applied (e.g., differential privacy, k-anonymity, data masking)?\n", + "slot_uri": "d4d:privacyTechniques", + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "data_linkage", + "description": "Can this dataset be linked to other datasets in ways that might compromise participant privacy?\n", + "slot_uri": "d4d:dataLinkage", + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + } + ], + "class_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#ParticipantPrivacy", + "@type": "ClassDefinition" + }, + { + "name": "HumanSubjectCompensation", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#HumanSubjectCompensation", + "description": "Information about compensation or incentives provided to human research participants.\n", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", + "is_a": "DatasetProperty", + "slots": [ + "datasetProperty__id", + "datasetProperty__name", + "datasetProperty__description", + "datasetProperty__used_software", + "humanSubjectCompensation__compensation_provided", + "humanSubjectCompensation__compensation_type", + "humanSubjectCompensation__compensation_amount", + "humanSubjectCompensation__compensation_rationale" + ], + "slot_usage": {}, + "attributes": [ + { + "name": "compensation_provided", + "description": "Were participants compensated for their participation?", + "slot_uri": "d4d:compensationProvided", + "range": "boolean", + "@type": "SlotDefinition" + }, + { + "name": "compensation_type", + "description": "What type of compensation was provided (e.g., monetary payment, gift cards, course credit, other incentives)?\n", + "slot_uri": "d4d:compensationType", + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "compensation_amount", + "description": "What was the amount or value of compensation provided? Include currency or equivalent value.\n", + "slot_uri": "d4d:compensationAmount", + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "compensation_rationale", + "description": "What was the rationale for the compensation structure? How was the amount determined to be appropriate?\n", + "slot_uri": "d4d:compensationRationale", + "range": "string", + "multivalued": true, + "@type": "SlotDefinition" + } + ], + "class_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#HumanSubjectCompensation", + "@type": "ClassDefinition" + }, + { + "name": "AtRiskPopulations", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#AtRiskPopulations", "description": "Information about protections for at-risk populations in human subjects research.\n", "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/human", "is_a": "DatasetProperty", @@ -9003,22 +10224,24 @@ "datasetProperty__name", "datasetProperty__description", "datasetProperty__used_software", - "vulnerablePopulations__vulnerable_groups_included", - "vulnerablePopulations__special_protections", - "vulnerablePopulations__assent_procedures", - "vulnerablePopulations__guardian_consent" + "atRiskPopulations__at_risk_groups_included", + "atRiskPopulations__special_protections", + "atRiskPopulations__assent_procedures", + "atRiskPopulations__guardian_consent" ], "slot_usage": {}, "attributes": [ { - "name": "vulnerable_groups_included", + "name": "at_risk_groups_included", "description": "Are any at-risk populations included (e.g., children, pregnant women, prisoners, cognitively impaired individuals)?\n", + "slot_uri": "d4d:atRiskGroupsIncluded", "range": "boolean", "@type": "SlotDefinition" }, { "name": "special_protections", "description": "What additional protections were implemented for at-risk populations? Include safeguards, modified procedures, or additional oversight.\n", + "slot_uri": "d4d:specialProtections", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -9026,6 +10249,7 @@ { "name": "assent_procedures", "description": "For research involving minors, what assent procedures were used? How was developmentally appropriate assent obtained?\n", + "slot_uri": "d4d:assentProcedures", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -9033,12 +10257,13 @@ { "name": "guardian_consent", "description": "For participants unable to provide their own consent, how was guardian or surrogate consent obtained?\n", + "slot_uri": "d4d:guardianConsent", "range": "string", "multivalued": true, "@type": "SlotDefinition" } ], - "class_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#VulnerablePopulations", + "class_uri": "https://w3id.org/bridge2ai/data-sheets-schema/human#AtRiskPopulations", "@type": "ClassDefinition" }, { @@ -9157,12 +10382,14 @@ { "name": "hipaa_compliant", "description": "Indicates compliance with the Health Insurance Portability and Accountability Act (HIPAA). HIPAA applies to protected health information in the United States.", + "slot_uri": "d4d:hipaaCompliant", "range": "ComplianceStatusEnum", "@type": "SlotDefinition" }, { "name": "other_compliance", "description": "Other regulatory compliance frameworks applicable to this dataset (e.g., CCPA, PIPEDA, industry-specific regulations).", + "slot_uri": "d4d:otherCompliance", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -9170,6 +10397,7 @@ { "name": "confidentiality_level", "description": "Confidentiality classification of the dataset indicating level of access restrictions and sensitivity.", + "slot_uri": "d4d:confidentialityLevel", "range": "ConfidentialityLevelEnum", "@type": "SlotDefinition" }, @@ -9256,6 +10484,7 @@ { "name": "missing_value_code", "description": "Code(s) used to represent missing values for this variable. Examples: \"NA\", \"-999\", \"null\", \"\". Multiple codes may be specified.", + "slot_uri": "d4d:missingValueCode", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -9300,12 +10529,14 @@ { "name": "is_sensitive", "description": "Indicates whether this variable contains sensitive information (e.g., personal data, protected health information).", + "slot_uri": "d4d:isSensitive", "range": "boolean", "@type": "SlotDefinition" }, { "name": "precision", "description": "The precision or number of decimal places for numeric variables.", + "slot_uri": "schema:valuePrecision", "range": "integer", "@type": "SlotDefinition" }, @@ -9319,12 +10550,14 @@ { "name": "derivation", "description": "Description of how this variable was derived or calculated from other variables, if applicable.", + "slot_uri": "dcterms:provenance", "range": "string", "@type": "SlotDefinition" }, { "name": "quality_notes", "description": "Notes about data quality, reliability, or known issues specific to this variable.", + "slot_uri": "dcterms:description", "range": "string", "multivalued": true, "@type": "SlotDefinition" @@ -9332,13 +10565,157 @@ ], "class_uri": "http://schema.org/PropertyValue", "@type": "ClassDefinition" + }, + { + "name": "File", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection/File", + "description": "A single file within a dataset or file collection. Represents an individual data file, code file, documentation file, etc. Maps to RO-Crate File entities.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection", + "aliases": [ + "data file", + "file", + "file object" + ], + "mappings": [ + "schema:MediaObject" + ], + "exact_mappings": [ + "schema:DigitalDocument" + ], + "is_a": "Information", + "slots": [ + "namedThing__id", + "namedThing__name", + "namedThing__description", + "conforms_to", + "conforms_to_class", + "conforms_to_schema", + "created_by", + "created_on", + "doi", + "download_url", + "issued", + "keywords", + "language", + "last_updated_on", + "license", + "modified_by", + "page", + "publisher", + "status", + "title", + "version", + "was_derived_from", + "bytes", + "path", + "format", + "encoding", + "compression", + "media_type", + "hash", + "md5", + "sha256", + "dialect", + "file__file_type" + ], + "slot_usage": {}, + "attributes": [ + { + "name": "file_type", + "description": "Semantic type or purpose of this file (e.g., data_file, code_file, documentation_file, metadata_file).", + "slot_uri": "d4d:fileType", + "range": "FileTypeEnum", + "@type": "SlotDefinition" + } + ], + "class_uri": "http://schema.org/MediaObject", + "@type": "ClassDefinition" + }, + { + "name": "FileCollection", + "definition_uri": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection/FileCollection", + "description": "A collection of files with shared characteristics (format, purpose, structure). Represents a logical grouping of related files within a dataset, such as all training data files, all image files, or all raw data files. Maps to RO-Crate Dataset entities via schema:hasPart relationships.", + "from_schema": "https://w3id.org/bridge2ai/data-sheets-schema/file-collection", + "aliases": [ + "file collection", + "data files", + "file group" + ], + "mappings": [ + "dcat:Dataset" + ], + "exact_mappings": [ + "schema:Dataset" + ], + "close_mappings": [ + "dcat:Distribution" + ], + "is_a": "Information", + "slots": [ + "namedThing__id", + "namedThing__name", + "namedThing__description", + "conforms_to", + "conforms_to_class", + "conforms_to_schema", + "created_by", + "created_on", + "doi", + "download_url", + "issued", + "keywords", + "language", + "last_updated_on", + "license", + "modified_by", + "page", + "publisher", + "status", + "title", + "version", + "was_derived_from", + "FileCollection_path", + "FileCollection_compression", + "FileCollection_external_resources", + "FileCollection_resources", + "fileCollection__collection_type", + "fileCollection__file_count", + "fileCollection__total_bytes" + ], + "slot_usage": {}, + "attributes": [ + { + "name": "collection_type", + "description": "Type(s) of content in this file collection. A collection may have multiple types, for example a collection containing both raw_data and documentation files would have both types listed.", + "slot_uri": "d4d:collectionType", + "range": "FileCollectionTypeEnum", + "multivalued": true, + "@type": "SlotDefinition" + }, + { + "name": "file_count", + "description": "Number of files in this collection.", + "slot_uri": "d4d:fileCount", + "range": "integer", + "@type": "SlotDefinition" + }, + { + "name": "total_bytes", + "description": "Total size of all files in bytes.", + "slot_uri": "dcat:byteSize", + "range": "integer", + "@type": "SlotDefinition" + } + ], + "class_uri": "http://www.w3.org/ns/dcat#Dataset", + "@type": "ClassDefinition" } ], "metamodel_version": "1.7.0", "source_file": "data_sheets_schema.yaml", - "source_file_date": "2026-03-09T13:04:46", - "source_file_size": 14675, - "generation_date": "2026-03-09T13:05:09", + "source_file_date": "2026-03-26T22:44:02", + "source_file_size": 18530, + "generation_date": "2026-04-06T21:13:16", "@type": "SchemaDefinition", "@context": [ "project/jsonld/data_sheets_schema.context.jsonld", @@ -9355,6 +10732,7 @@ "D4D_Human.context.jsonld", "D4D_Data_Governance.context.jsonld", "D4D_Variables.context.jsonld", + "D4D_FileCollection.context.jsonld", { "@base": "https://w3id.org/bridge2ai/data-sheets-schema/" } diff --git a/project/jsonschema/data_sheets_schema.schema.json b/project/jsonschema/data_sheets_schema.schema.json index 2c729990..b348955a 100644 --- a/project/jsonschema/data_sheets_schema.schema.json +++ b/project/jsonschema/data_sheets_schema.schema.json @@ -126,6 +126,82 @@ "title": "AnnotationAnalysis", "type": "object" }, + "AtRiskPopulations": { + "additionalProperties": false, + "description": "Information about protections for at-risk populations in human subjects research.", + "properties": { + "assent_procedures": { + "description": "For research involving minors, what assent procedures were used? How was developmentally appropriate assent obtained?\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "at_risk_groups_included": { + "description": "Are any at-risk populations included (e.g., children, pregnant women, prisoners, cognitively impaired individuals)?\n", + "type": [ + "boolean", + "null" + ] + }, + "description": { + "description": "A human-readable description for this property.", + "type": [ + "string", + "null" + ] + }, + "guardian_consent": { + "description": "For participants unable to provide their own consent, how was guardian or surrogate consent obtained?\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "id": { + "description": "An optional identifier for this property.", + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "A human-readable name for this property.", + "type": [ + "string", + "null" + ] + }, + "special_protections": { + "description": "What additional protections were implemented for at-risk populations? Include safeguards, modified procedures, or additional oversight.\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "used_software": { + "description": "What software was used as part of this dataset property?", + "items": { + "$ref": "#/$defs/Software" + }, + "type": [ + "array", + "null" + ] + } + }, + "title": "AtRiskPopulations", + "type": "object" + }, "BiasTypeEnum": { "description": "Types of bias that may be present in datasets. Values are mapped to the Artificial Intelligence Ontology (AIO) bias taxonomy from BioPortal. See https://bioportal.bioontology.org/ontologies/AIO", "enum": [ @@ -909,12 +985,16 @@ "null" ] }, - "bytes": { - "description": "Size of the data in bytes.", - "type": [ - "integer", - "null" - ] + "at_risk_populations": { + "anyOf": [ + { + "$ref": "#/$defs/AtRiskPopulations" + }, + { + "type": "null" + } + ], + "description": "Information about protections for at-risk populations (e.g., minors, pregnant women, prisoners) including special safeguards and assent procedures." }, "citation": { "description": "Recommended citation for this dataset in DataCite or BibTeX format. Provides a standard way to cite the dataset in publications.", @@ -1037,12 +1117,6 @@ "null" ] }, - "dialect": { - "type": [ - "string", - "null" - ] - }, "discouraged_uses": { "items": { "$ref": "#/$defs/DiscouragedUse" @@ -1085,10 +1159,6 @@ "null" ] }, - "encoding": { - "$ref": "#/$defs/EncodingEnum", - "description": "the character encoding of the data" - }, "errata": { "items": { "$ref": "#/$defs/Erratum" @@ -1127,7 +1197,7 @@ ] }, "external_resources": { - "description": "Links or identifiers for external resources. Can be used either as a list of ExternalResource objects (in Dataset) or as a list of URL strings (within ExternalResource class).", + "description": "External resources referenced at the dataset level (e.g., related publications, repositories, documentation). For file-level external resources, use FileCollection.external_resources.", "items": { "$ref": "#/$defs/ExternalResource" }, @@ -1136,9 +1206,15 @@ "null" ] }, - "format": { - "$ref": "#/$defs/FormatEnum", - "description": "The file format, physical medium, or dimensions of a resource. This should be a file extension or MIME type." + "file_collections": { + "description": "Collections of files within this dataset. Each collection represents a logical grouping of files with shared characteristics (e.g., all training data, all image files, all raw data files). Maps to nested RO-Crate Dataset entities via schema:hasPart.", + "items": { + "$ref": "#/$defs/FileCollection" + }, + "type": [ + "array", + "null" + ] }, "funders": { "items": { @@ -1158,13 +1234,6 @@ "null" ] }, - "hash": { - "description": "hash of the data", - "type": [ - "string", - "null" - ] - }, "human_subject_research": { "anyOf": [ { @@ -1353,17 +1422,6 @@ "null" ] }, - "md5": { - "description": "md5 hash of the data", - "type": [ - "string", - "null" - ] - }, - "media_type": { - "$ref": "#/$defs/MediaTypeEnum", - "description": "The media type of the data. This should be a MIME type." - }, "missing_data_documentation": { "description": "Documentation of missing data patterns and handling strategies.", "items": { @@ -1412,9 +1470,23 @@ "null" ] }, - "path": { + "participant_compensation": { + "description": "Information about compensation or incentives provided to human research participants.", + "items": { + "$ref": "#/$defs/HumanSubjectCompensation" + }, "type": [ - "string", + "array", + "null" + ] + }, + "participant_privacy": { + "description": "Information about privacy protections and anonymization procedures for human research participants.", + "items": { + "$ref": "#/$defs/ParticipantPrivacy" + }, + "type": [ + "array", "null" ] }, @@ -1492,7 +1564,7 @@ ] }, "resources": { - "description": "Sub-resources or component datasets that are part of this dataset. Allows datasets to contain nested resource structures.", + "description": "Sub-resources or component datasets that are part of this dataset. Note: For file collections, use the file_collections attribute instead.", "items": { "$ref": "#/$defs/Dataset" }, @@ -1529,13 +1601,6 @@ "null" ] }, - "sha256": { - "description": "sha256 hash of the data", - "type": [ - "string", - "null" - ] - }, "status": { "type": [ "string", @@ -1576,6 +1641,20 @@ "null" ] }, + "total_file_count": { + "description": "Total number of files across all file collections in this dataset. Can be aggregated from file_collections[].file_count.", + "type": [ + "integer", + "null" + ] + }, + "total_size_bytes": { + "description": "Total size of all files in bytes across all file collections. Can be aggregated from file_collections[].total_bytes.", + "type": [ + "integer", + "null" + ] + }, "updates": { "anyOf": [ { @@ -1621,17 +1700,6 @@ } ] }, - "vulnerable_populations": { - "anyOf": [ - { - "$ref": "#/$defs/VulnerablePopulations" - }, - { - "type": "null" - } - ], - "description": "Information about protections for at-risk populations (e.g., minors, pregnant women, prisoners) including special safeguards and assent procedures." - }, "was_derived_from": { "type": [ "string", @@ -1715,12 +1783,16 @@ "null" ] }, - "bytes": { - "description": "Size of the data in bytes.", - "type": [ - "integer", - "null" - ] + "at_risk_populations": { + "anyOf": [ + { + "$ref": "#/$defs/AtRiskPopulations" + }, + { + "type": "null" + } + ], + "description": "Information about protections for at-risk populations (e.g., minors, pregnant women, prisoners) including special safeguards and assent procedures." }, "citation": { "description": "Recommended citation for this dataset in DataCite or BibTeX format. Provides a standard way to cite the dataset in publications.", @@ -1843,12 +1915,6 @@ "null" ] }, - "dialect": { - "type": [ - "string", - "null" - ] - }, "discouraged_uses": { "items": { "$ref": "#/$defs/DiscouragedUse" @@ -1891,10 +1957,6 @@ "null" ] }, - "encoding": { - "$ref": "#/$defs/EncodingEnum", - "description": "the character encoding of the data" - }, "errata": { "items": { "$ref": "#/$defs/Erratum" @@ -1933,7 +1995,7 @@ ] }, "external_resources": { - "description": "Links or identifiers for external resources. Can be used either as a list of ExternalResource objects (in Dataset) or as a list of URL strings (within ExternalResource class).", + "description": "External resources referenced at the dataset level (e.g., related publications, repositories, documentation). For file-level external resources, use FileCollection.external_resources.", "items": { "$ref": "#/$defs/ExternalResource" }, @@ -1942,9 +2004,15 @@ "null" ] }, - "format": { - "$ref": "#/$defs/FormatEnum", - "description": "The file format, physical medium, or dimensions of a resource. This should be a file extension or MIME type." + "file_collections": { + "description": "Collections of files within this dataset. Each collection represents a logical grouping of files with shared characteristics (e.g., all training data, all image files, all raw data files). Maps to nested RO-Crate Dataset entities via schema:hasPart.", + "items": { + "$ref": "#/$defs/FileCollection" + }, + "type": [ + "array", + "null" + ] }, "funders": { "items": { @@ -1964,13 +2032,6 @@ "null" ] }, - "hash": { - "description": "hash of the data", - "type": [ - "string", - "null" - ] - }, "human_subject_research": { "anyOf": [ { @@ -2145,17 +2206,6 @@ "null" ] }, - "md5": { - "description": "md5 hash of the data", - "type": [ - "string", - "null" - ] - }, - "media_type": { - "$ref": "#/$defs/MediaTypeEnum", - "description": "The media type of the data. This should be a MIME type." - }, "missing_data_documentation": { "description": "Documentation of missing data patterns and handling strategies.", "items": { @@ -2204,9 +2254,23 @@ "null" ] }, - "path": { + "participant_compensation": { + "description": "Information about compensation or incentives provided to human research participants.", + "items": { + "$ref": "#/$defs/HumanSubjectCompensation" + }, "type": [ - "string", + "array", + "null" + ] + }, + "participant_privacy": { + "description": "Information about privacy protections and anonymization procedures for human research participants.", + "items": { + "$ref": "#/$defs/ParticipantPrivacy" + }, + "type": [ + "array", "null" ] }, @@ -2284,7 +2348,7 @@ ] }, "resources": { - "description": "Sub-resources or component datasets that are part of this dataset. Allows datasets to contain nested resource structures.", + "description": "Sub-resources or component datasets that are part of this dataset. Note: For file collections, use the file_collections attribute instead.", "items": { "$ref": "#/$defs/Dataset" }, @@ -2321,13 +2385,6 @@ "null" ] }, - "sha256": { - "description": "sha256 hash of the data", - "type": [ - "string", - "null" - ] - }, "status": { "type": [ "string", @@ -2368,6 +2425,20 @@ "null" ] }, + "total_file_count": { + "description": "Total number of files across all file collections in this dataset. Can be aggregated from file_collections[].file_count.", + "type": [ + "integer", + "null" + ] + }, + "total_size_bytes": { + "description": "Total size of all files in bytes across all file collections. Can be aggregated from file_collections[].total_bytes.", + "type": [ + "integer", + "null" + ] + }, "updates": { "anyOf": [ { @@ -2413,17 +2484,6 @@ } ] }, - "vulnerable_populations": { - "anyOf": [ - { - "$ref": "#/$defs/VulnerablePopulations" - }, - { - "type": "null" - } - ], - "description": "Information about protections for at-risk populations (e.g., minors, pregnant women, prisoners) including special safeguards and assent procedures." - }, "was_derived_from": { "type": [ "string", @@ -2631,7 +2691,7 @@ ] }, "resources": { - "description": "Sub-resources or component datasets. Used in DatasetCollection to contain Dataset objects, and in Dataset to allow nested resource structures.", + "description": "Sub-resources or component items. In DatasetCollection, contains Dataset objects. In Dataset, contains nested Dataset objects. In FileCollection, contains nested FileCollection objects. The specific range is defined via slot_usage in each class.", "items": { "$ref": "#/$defs/Dataset" }, @@ -3541,11 +3601,476 @@ "title": "ExternalResource", "type": "object" }, - "FormatDialect": { + "File": { "additionalProperties": false, - "description": "Additional format information for a file", + "description": "A single file within a dataset or file collection. Represents an individual data file, code file, documentation file, etc. Maps to RO-Crate File entities.", "properties": { - "comment_prefix": { + "bytes": { + "description": "Size of the data in bytes.", + "type": [ + "integer", + "null" + ] + }, + "compression": { + "$ref": "#/$defs/CompressionEnum", + "description": "compression format used, if any. e.g., gzip, bzip2, zip" + }, + "conforms_to": { + "type": [ + "string", + "null" + ] + }, + "conforms_to_class": { + "type": [ + "string", + "null" + ] + }, + "conforms_to_schema": { + "type": [ + "string", + "null" + ] + }, + "created_by": { + "type": [ + "string", + "null" + ] + }, + "created_on": { + "format": "date-time", + "type": [ + "string", + "null" + ] + }, + "description": { + "description": "A human-readable description for a thing.", + "type": [ + "string", + "null" + ] + }, + "dialect": { + "description": "Specific format dialect or variation (e.g., CSV dialect, JSON-LD profile).", + "type": [ + "string", + "null" + ] + }, + "doi": { + "description": "digital object identifier", + "pattern": "10\\.\\d{4,}\\/.+", + "type": [ + "string", + "null" + ] + }, + "download_url": { + "description": "URL from which the data can be downloaded. This is not the same as the landing page, which is a page that describes the dataset. Rather, this URL points directly to the data itself.", + "type": [ + "string", + "null" + ] + }, + "encoding": { + "$ref": "#/$defs/EncodingEnum", + "description": "the character encoding of the data" + }, + "file_type": { + "$ref": "#/$defs/FileTypeEnum", + "description": "Semantic type or purpose of this file (e.g., data_file, code_file, documentation_file, metadata_file)." + }, + "format": { + "$ref": "#/$defs/FormatEnum", + "description": "The file format, physical medium, or dimensions of a resource. This should be a file extension or MIME type." + }, + "hash": { + "description": "hash of the data", + "type": [ + "string", + "null" + ] + }, + "id": { + "description": "A unique identifier for a thing.", + "type": "string" + }, + "issued": { + "format": "date-time", + "type": [ + "string", + "null" + ] + }, + "keywords": { + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "language": { + "description": "language in which the information is expressed", + "type": [ + "string", + "null" + ] + }, + "last_updated_on": { + "format": "date-time", + "type": [ + "string", + "null" + ] + }, + "license": { + "type": [ + "string", + "null" + ] + }, + "md5": { + "description": "md5 hash of the data", + "type": [ + "string", + "null" + ] + }, + "media_type": { + "$ref": "#/$defs/MediaTypeEnum", + "description": "The media type of the data. This should be a MIME type." + }, + "modified_by": { + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "A human-readable name for a thing.", + "type": [ + "string", + "null" + ] + }, + "page": { + "type": [ + "string", + "null" + ] + }, + "path": { + "type": [ + "string", + "null" + ] + }, + "publisher": { + "type": [ + "string", + "null" + ] + }, + "sha256": { + "description": "sha256 hash of the data", + "type": [ + "string", + "null" + ] + }, + "status": { + "type": [ + "string", + "null" + ] + }, + "title": { + "description": "the official title of the element", + "type": [ + "string", + "null" + ] + }, + "version": { + "type": [ + "string", + "null" + ] + }, + "was_derived_from": { + "type": [ + "string", + "null" + ] + } + }, + "required": [ + "id" + ], + "title": "File", + "type": "object" + }, + "FileCollection": { + "additionalProperties": false, + "description": "A collection of files with shared characteristics (format, purpose, structure). Represents a logical grouping of related files within a dataset, such as all training data files, all image files, or all raw data files. Maps to RO-Crate Dataset entities via schema:hasPart relationships.", + "properties": { + "collection_type": { + "description": "Type(s) of content in this file collection. A collection may have multiple types, for example a collection containing both raw_data and documentation files would have both types listed.", + "items": { + "$ref": "#/$defs/FileCollectionTypeEnum" + }, + "type": [ + "array", + "null" + ] + }, + "compression": { + "$ref": "#/$defs/CompressionEnum", + "description": "Compression format if the collection is packaged as a compressed archive (e.g., gzip, zip, bzip2). Omit this field for uncompressed collections or purely logical groupings." + }, + "conforms_to": { + "type": [ + "string", + "null" + ] + }, + "conforms_to_class": { + "type": [ + "string", + "null" + ] + }, + "conforms_to_schema": { + "type": [ + "string", + "null" + ] + }, + "created_by": { + "type": [ + "string", + "null" + ] + }, + "created_on": { + "format": "date-time", + "type": [ + "string", + "null" + ] + }, + "description": { + "description": "A human-readable description for a thing.", + "type": [ + "string", + "null" + ] + }, + "doi": { + "description": "digital object identifier", + "pattern": "10\\.\\d{4,}\\/.+", + "type": [ + "string", + "null" + ] + }, + "download_url": { + "description": "URL from which the data can be downloaded. This is not the same as the landing page, which is a page that describes the dataset. Rather, this URL points directly to the data itself.", + "type": [ + "string", + "null" + ] + }, + "external_resources": { + "description": "External files or URLs referenced by this file collection.", + "items": { + "$ref": "#/$defs/ExternalResource" + }, + "type": [ + "array", + "null" + ] + }, + "file_count": { + "description": "Number of files in this collection.", + "type": [ + "integer", + "null" + ] + }, + "id": { + "description": "A unique identifier for a thing.", + "type": "string" + }, + "issued": { + "format": "date-time", + "type": [ + "string", + "null" + ] + }, + "keywords": { + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "language": { + "description": "language in which the information is expressed", + "type": [ + "string", + "null" + ] + }, + "last_updated_on": { + "format": "date-time", + "type": [ + "string", + "null" + ] + }, + "license": { + "type": [ + "string", + "null" + ] + }, + "modified_by": { + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "A human-readable name for a thing.", + "type": [ + "string", + "null" + ] + }, + "page": { + "type": [ + "string", + "null" + ] + }, + "path": { + "description": "Path or URL to the FileCollection. May be a directory path, archive file path, or download URL depending on how the collection is distributed.", + "type": [ + "string", + "null" + ] + }, + "publisher": { + "type": [ + "string", + "null" + ] + }, + "resources": { + "description": "Individual files or nested file collections within this collection. Allows hierarchical file organization with both File objects and nested FileCollection objects.", + "items": { + "$ref": "#/$defs/Dataset", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string" + } + ] + }, + "type": [ + "array", + "null" + ] + }, + "status": { + "type": [ + "string", + "null" + ] + }, + "title": { + "description": "the official title of the element", + "type": [ + "string", + "null" + ] + }, + "total_bytes": { + "description": "Total size of all files in bytes.", + "type": [ + "integer", + "null" + ] + }, + "version": { + "type": [ + "string", + "null" + ] + }, + "was_derived_from": { + "type": [ + "string", + "null" + ] + } + }, + "required": [ + "id" + ], + "title": "FileCollection", + "type": "object" + }, + "FileCollectionTypeEnum": { + "description": "Types of file collections within datasets.", + "enum": [ + "raw_data", + "processed_data", + "training_split", + "test_split", + "validation_split", + "documentation", + "metadata", + "code", + "supplementary", + "other" + ], + "title": "FileCollectionTypeEnum", + "type": "string" + }, + "FileTypeEnum": { + "description": "Types of individual files within datasets.", + "enum": [ + "data_file", + "code_file", + "documentation_file", + "metadata_file", + "configuration_file", + "notebook_file", + "image_file", + "archive_file", + "other" + ], + "title": "FileTypeEnum", + "type": "string" + }, + "FormatDialect": { + "additionalProperties": false, + "description": "Additional format information for a file", + "properties": { + "comment_prefix": { "type": [ "string", "null" @@ -3774,6 +4299,82 @@ "title": "Grantor", "type": "object" }, + "HumanSubjectCompensation": { + "additionalProperties": false, + "description": "Information about compensation or incentives provided to human research participants.", + "properties": { + "compensation_amount": { + "description": "What was the amount or value of compensation provided? Include currency or equivalent value.\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "compensation_provided": { + "description": "Were participants compensated for their participation?", + "type": [ + "boolean", + "null" + ] + }, + "compensation_rationale": { + "description": "What was the rationale for the compensation structure? How was the amount determined to be appropriate?\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "compensation_type": { + "description": "What type of compensation was provided (e.g., monetary payment, gift cards, course credit, other incentives)?\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "description": { + "description": "A human-readable description for this property.", + "type": [ + "string", + "null" + ] + }, + "id": { + "description": "An optional identifier for this property.", + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "A human-readable name for this property.", + "type": [ + "string", + "null" + ] + }, + "used_software": { + "description": "What software was used as part of this dataset property?", + "items": { + "$ref": "#/$defs/Software" + }, + "type": [ + "array", + "null" + ] + } + }, + "title": "HumanSubjectCompensation", + "type": "object" + }, "HumanSubjectResearch": { "additionalProperties": false, "description": "Information about whether the dataset involves human subjects research and what regulatory or ethical review processes were followed.", @@ -5026,6 +5627,85 @@ "title": "OtherTask", "type": "object" }, + "ParticipantPrivacy": { + "additionalProperties": false, + "description": "Information about privacy protections and anonymization procedures for human research participants.", + "properties": { + "anonymization_method": { + "description": "What methods were used to anonymize or de-identify participant data? Include technical details of privacy-preserving techniques.\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "data_linkage": { + "description": "Can this dataset be linked to other datasets in ways that might compromise participant privacy?\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "description": { + "description": "A human-readable description for this property.", + "type": [ + "string", + "null" + ] + }, + "id": { + "description": "An optional identifier for this property.", + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "A human-readable name for this property.", + "type": [ + "string", + "null" + ] + }, + "privacy_techniques": { + "description": "What privacy-preserving techniques were applied (e.g., differential privacy, k-anonymity, data masking)?\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "reidentification_risk": { + "description": "What is the assessed risk of re-identification? What measures were taken to minimize this risk?\n", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "used_software": { + "description": "What software was used as part of this dataset property?", + "items": { + "$ref": "#/$defs/Software" + }, + "type": [ + "array", + "null" + ] + } + }, + "title": "ParticipantPrivacy", + "type": "object" + }, "Person": { "additionalProperties": false, "description": "An individual human being. This class represents a person in the context of a specific dataset. Attributes like affiliation and email represent the person's current or most relevant contact information for this dataset. For stable cross-dataset identification, use the ORCID field. Note that contributor roles (CRediT) are specified in the usage context (e.g., Creator class) rather than on the Person directly, since roles vary by dataset.", @@ -6237,82 +6917,6 @@ ], "title": "VersionTypeEnum", "type": "string" - }, - "VulnerablePopulations": { - "additionalProperties": false, - "description": "Information about protections for at-risk populations in human subjects research.", - "properties": { - "assent_procedures": { - "description": "For research involving minors, what assent procedures were used? How was developmentally appropriate assent obtained?\n", - "items": { - "type": "string" - }, - "type": [ - "array", - "null" - ] - }, - "description": { - "description": "A human-readable description for this property.", - "type": [ - "string", - "null" - ] - }, - "guardian_consent": { - "description": "For participants unable to provide their own consent, how was guardian or surrogate consent obtained?\n", - "items": { - "type": "string" - }, - "type": [ - "array", - "null" - ] - }, - "id": { - "description": "An optional identifier for this property.", - "type": [ - "string", - "null" - ] - }, - "name": { - "description": "A human-readable name for this property.", - "type": [ - "string", - "null" - ] - }, - "special_protections": { - "description": "What additional protections were implemented for at-risk populations? Include safeguards, modified procedures, or additional oversight.\n", - "items": { - "type": "string" - }, - "type": [ - "array", - "null" - ] - }, - "used_software": { - "description": "What software was used as part of this dataset property?", - "items": { - "$ref": "#/$defs/Software" - }, - "type": [ - "array", - "null" - ] - }, - "vulnerable_groups_included": { - "description": "Are any at-risk populations included (e.g., children, pregnant women, prisoners, cognitively impaired individuals)?\n", - "type": [ - "boolean", - "null" - ] - } - }, - "title": "VulnerablePopulations", - "type": "object" } }, "$id": "https://w3id.org/bridge2ai/data-sheets-schema", @@ -6444,7 +7048,7 @@ ] }, "resources": { - "description": "Sub-resources or component datasets. Used in DatasetCollection to contain Dataset objects, and in Dataset to allow nested resource structures.", + "description": "Sub-resources or component items. In DatasetCollection, contains Dataset objects. In Dataset, contains nested Dataset objects. In FileCollection, contains nested FileCollection objects. The specific range is defined via slot_usage in each class.", "items": { "$ref": "#/$defs/Dataset" }, diff --git a/project/owl/data_sheets_schema.owl.ttl b/project/owl/data_sheets_schema.owl.ttl index a4b0e2c9..29506e46 100644 --- a/project/owl/data_sheets_schema.owl.ttl +++ b/project/owl/data_sheets_schema.owl.ttl @@ -15,10 +15,10 @@ data_sheets_schema:DatasetCollection a owl:Class, linkml:ClassDefinition ; rdfs:label "DatasetCollection" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:Dataset ; + owl:minCardinality 0 ; owl:onProperty data_sheets_schema:resources ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom data_sheets_schema:Dataset ; owl:onProperty data_sheets_schema:resources ], data_sheets_schema:Information ; skos:altLabel "data resource collection", @@ -34,49 +34,49 @@ data_sheets_schema:FormatDialect a owl:Class, rdfs:label "FormatDialect" ; rdfs:subClassOf [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:comment_prefix ], + owl:onProperty data_sheets_schema:delimiter ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:double_quote ], + owl:onProperty data_sheets_schema:quote_char ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty data_sheets_schema:header ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:quote_char ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:double_quote ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:header ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; owl:onProperty data_sheets_schema:delimiter ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:header ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty data_sheets_schema:delimiter ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:double_quote ], [ a owl:Restriction ; owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:quote_char ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; owl:onProperty data_sheets_schema:comment_prefix ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:delimiter ], + owl:onProperty data_sheets_schema:double_quote ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:double_quote ], + owl:onProperty data_sheets_schema:quote_char ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:double_quote ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:comment_prefix ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:quote_char ], + owl:onProperty data_sheets_schema:header ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:quote_char ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:comment_prefix ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:comment_prefix ] ; + owl:onProperty data_sheets_schema:header ] ; skos:definition "Additional format information for a file" ; skos:inScheme data_sheets_schema:base . @@ -84,9 +84,6 @@ data_sheets_schema:FormatDialect a owl:Class, linkml:ClassDefinition ; rdfs:label "DirectCollection" ; rdfs:subClassOf [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], - [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; @@ -95,6 +92,9 @@ data_sheets_schema:FormatDialect a owl:Class, [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; owl:onProperty ], @@ -107,10 +107,10 @@ data_sheets_schema:FormatDialect a owl:Class, linkml:ClassDefinition ; rdfs:label "Relationships" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Are relationships between individual instances made explicit (e.g., users' movie ratings, social network links)? @@ -135,13 +135,13 @@ data_sheets_schema:FormatDialect a owl:Class, linkml:ClassDefinition ; rdfs:label "ThirdPartySharing" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:Boolean ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; + owl:maxCardinality 1 ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Will the dataset be distributed to third parties outside of the entity (e.g., company, institution, organization) on behalf of which the dataset was created? @@ -166,10 +166,10 @@ data_sheets_schema:FormatDialect a owl:Class, linkml:ClassDefinition ; rdfs:label "CollectionNotification" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Were the individuals in question notified about the data collection? If so, please describe (or show with screenshots, etc.) how notice was provided, and reproduce the language of the notification itself if possible. @@ -180,10 +180,10 @@ data_sheets_schema:FormatDialect a owl:Class, linkml:ClassDefinition ; rdfs:label "ConsentRevocation" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """If consent was obtained, were the consenting individuals provided with a mechanism to revoke their consent in the future or for certain uses? If so, please describe. @@ -209,43 +209,142 @@ data_sheets_schema:DataSubset a owl:Class, linkml:ClassDefinition ; rdfs:label "DataSubset" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:is_subpopulation ], + owl:allValuesFrom linkml:Boolean ; + owl:onProperty data_sheets_schema:is_data_split ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; owl:onProperty data_sheets_schema:is_subpopulation ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; owl:onProperty data_sheets_schema:is_data_split ], [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; owl:onProperty data_sheets_schema:is_subpopulation ], [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; + owl:maxCardinality 1 ; owl:onProperty data_sheets_schema:is_data_split ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:is_data_split ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:is_subpopulation ], data_sheets_schema:Dataset ; skos:definition "A subset of a dataset, likely containing multiple files of multiple potential purposes and properties." ; skos:inScheme . -data_sheets_schema:Software a owl:Class, +data_sheets_schema:File a owl:Class, linkml:ClassDefinition ; - rdfs:label "Software" ; + rdfs:label "File" ; rdfs:subClassOf [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:version ], + owl:onProperty data_sheets_schema:sha256 ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:dialect ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:dialect ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:FileTypeEnum ; + owl:onProperty data_sheets_schema:file_type ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:format ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:hash ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:sha256 ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:url ], + owl:onProperty data_sheets_schema:sha256 ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:md5 ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:file_type ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Integer ; + owl:onProperty data_sheets_schema:bytes ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:path ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:hash ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:media_type ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:file_type ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:hash ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:compression ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:encoding ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:format ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:bytes ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:path ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:md5 ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:compression ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:bytes ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:CompressionEnum ; + owl:onProperty data_sheets_schema:compression ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:media_type ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:EncodingEnum ; + owl:onProperty data_sheets_schema:encoding ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:dialect ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:license ], + owl:onProperty data_sheets_schema:md5 ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:path ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:url ], + owl:onProperty data_sheets_schema:encoding ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:MediaTypeEnum ; + owl:onProperty data_sheets_schema:media_type ], [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:FormatEnum ; + owl:onProperty data_sheets_schema:format ], + data_sheets_schema:Information ; + skos:altLabel "data file", + "file", + "file object" ; + skos:definition "A single file within a dataset or file collection. Represents an individual data file, code file, documentation file, etc. Maps to RO-Crate File entities." ; + skos:exactMatch schema1:DigitalDocument, + schema1:MediaObject ; + skos:inScheme data_sheets_schema:file-collection . + +data_sheets_schema:Software a owl:Class, + linkml:ClassDefinition ; + rdfs:label "Software" ; + rdfs:subClassOf [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty data_sheets_schema:version ], [ a owl:Restriction ; @@ -253,13 +352,25 @@ data_sheets_schema:Software a owl:Class, owl:onProperty data_sheets_schema:license ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:version ], + owl:onProperty data_sheets_schema:license ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:license ], + owl:onProperty data_sheets_schema:url ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:version ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:version ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty data_sheets_schema:url ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:url ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:license ], data_sheets_schema:NamedThing ; skos:definition "A software program or library." ; skos:exactMatch schema1:SoftwareApplication ; @@ -269,10 +380,10 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "CollectionMechanism" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """What mechanisms or procedures were used to collect the data (e.g., hardware, manual curation, software APIs)? Also covers how these mechanisms were validated. @@ -284,29 +395,29 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "CollectionTimeframe" ; rdfs:subClassOf [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Date ; + owl:minCardinality 0 ; owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Date ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:allValuesFrom linkml:Date ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Date ; + owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Over what timeframe was the data collected, and does this timeframe match the creation timeframe of the underlying data? """ ; @@ -317,9 +428,6 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "DataCollector" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; @@ -327,10 +435,13 @@ data_sheets_schema:Software a owl:Class, owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Who was involved in the data collection (e.g., students, crowdworkers, contractors), and how they were compensated. """ ; @@ -340,47 +451,47 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "InstanceAcquisition" ; rdfs:subClassOf [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], + [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:allValuesFrom linkml:Boolean ; owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Describes how data associated with each instance was acquired (e.g., directly observed, reported by subjects, inferred). """ ; @@ -390,26 +501,26 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "MissingDataDocumentation" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:maxCardinality 1 ; owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Documentation of missing data in the dataset, including patterns, causes, and strategies for handling missing values. """ ; @@ -420,35 +531,35 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "RawDataSource" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 1 ; - owl:onProperty ], + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:minCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Description of raw data sources before preprocessing, cleaning, or labeling. Documents where the original data comes from and how it can be accessed. """ ; @@ -459,19 +570,19 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "Confidentiality" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Does the dataset contain data that might be confidential (e.g., protected by legal privilege, patient data, non-public communications)? @@ -482,20 +593,20 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "ContentWarning" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:allValuesFrom linkml:Boolean ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], + owl:allValuesFrom linkml:String ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Does the dataset contain any data that might be offensive, insulting, threatening, or otherwise anxiety-provoking if viewed directly? """ ; @@ -505,10 +616,10 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "DataAnomaly" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Are there any errors, sources of noise, or redundancies in the dataset? @@ -519,38 +630,38 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "DatasetBias" ; rdfs:subClassOf [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom data_sheets_schema:BiasTypeEnum ; owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Documents known biases present in the dataset. Biases are systematic errors or prejudices that may affect the representativeness or fairness of the data. Distinct from anomalies (data quality issues) and limitations (scope constraints). """ ; @@ -561,20 +672,11 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "DatasetLimitation" ; rdfs:subClassOf [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:allValuesFrom linkml:String ; + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], @@ -582,20 +684,29 @@ data_sheets_schema:Software a owl:Class, owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:LimitationTypeEnum ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:maxCardinality 1 ; owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:LimitationTypeEnum ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Documents known limitations of the dataset that may affect its use or interpretation. Distinct from biases (systematic errors) and anomalies (data quality issues). """ ; @@ -606,31 +717,31 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "DatasetRelationship" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 1 ; + owl:allValuesFrom data_sheets_schema:DatasetRelationshipTypeEnum ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:DatasetRelationshipTypeEnum ; + owl:maxCardinality 1 ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Typed relationship to another dataset, enabling precise specification of how datasets relate to each other (e.g., supplements, derives from, is version of). Supports RO-Crate-style dataset interlinking. @@ -642,68 +753,36 @@ data_sheets_schema:Software a owl:Class, rdfs:label "Deidentification" ; rdfs:subClassOf [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], - data_sheets_schema:DatasetProperty ; - skos:definition """Is it possible to identify individuals in the dataset, either directly or indirectly (in combination with other data)? -""" ; - skos:inScheme data_sheets_schema:composition . - - a owl:Class, - linkml:ClassDefinition ; - rdfs:label "ExternalResource" ; - rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:external_resources ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:external_resources ], + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:allValuesFrom linkml:Boolean ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; - skos:definition """Is the dataset self-contained or does it rely on external resources (e.g., websites, other datasets)? If external, are there guarantees that those resources will remain available and unchanged? + skos:definition """Is it possible to identify individuals in the dataset, either directly or indirectly (in combination with other data)? """ ; skos:inScheme data_sheets_schema:composition . @@ -711,71 +790,71 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "Instance" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:Uriorcurie ; + owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:allValuesFrom linkml:Uriorcurie ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:Uriorcurie ; - owl:onProperty ], + owl:allValuesFrom linkml:Boolean ; + owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Integer ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty ], + owl:allValuesFrom ; + owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:allValuesFrom ; + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:allValuesFrom linkml:Uriorcurie ; + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty ], + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """What do the instances that comprise the dataset represent (e.g., documents, photos, people, countries)? """ ; @@ -789,10 +868,10 @@ data_sheets_schema:Software a owl:Class, owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], @@ -805,17 +884,17 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "SensitiveElement" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], - [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Boolean ; + owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], @@ -829,26 +908,26 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "Subpopulation" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], - [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:allValuesFrom linkml:Boolean ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Does the dataset identify any subpopulations (e.g., by age, gender)? If so, how are they identified and what are their distributions? """ ; @@ -858,43 +937,43 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "ExportControlRegulatoryRestrictions" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:Person ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:ComplianceStatusEnum ; - owl:onProperty ], - [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:ConfidentialityLevelEnum ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:ComplianceStatusEnum ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:ConfidentialityLevelEnum ; - owl:onProperty ], + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:allValuesFrom data_sheets_schema:Person ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Do any export controls or other regulatory restrictions apply to the dataset or to individual instances? Includes compliance tracking for regulations like HIPAA and other US regulations. If so, please describe these restrictions and provide a link or copy of any supporting documentation. Maps to DUO terms related to ethics approval, geographic restrictions, and institutional requirements. @@ -905,10 +984,10 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "IPRestrictions" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Have any third parties imposed IP-based or other restrictions on the data associated with the instances? If so, describe them and note any relevant fees or licensing terms. Maps to DUO terms related to commercial/non-profit use restrictions (NCU, NPU, NPUNCU). @@ -919,22 +998,22 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "LicenseAndUseTerms" ; rdfs:subClassOf [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], + [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:DataUsePermissionEnum ; - owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom data_sheets_schema:Person ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:allValuesFrom data_sheets_schema:DataUsePermissionEnum ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; @@ -948,10 +1027,10 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "DistributionDate" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """When will the dataset be distributed? @@ -993,58 +1072,122 @@ data_sheets_schema:Software a owl:Class, owl:allValuesFrom data_sheets_schema:Organization ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:Person ; - owl:onProperty ], + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom data_sheets_schema:Person ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Were any ethical or compliance review processes conducted (e.g., by an institutional review board)? If so, please provide a description of these review processes, including the frequency of review and documentation of outcomes, as well as a link or other access point to any supporting documentation. """ ; skos:inScheme data_sheets_schema:ethics . - a owl:Class, + a owl:Class, linkml:ClassDefinition ; - rdfs:label "HumanSubjectResearch" ; + rdfs:label "AtRiskPopulations" ; rdfs:subClassOf [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + data_sheets_schema:DatasetProperty ; + skos:definition """Information about protections for at-risk populations in human subjects research. +""" ; + skos:inScheme data_sheets_schema:human . + + a owl:Class, + linkml:ClassDefinition ; + rdfs:label "HumanSubjectCompensation" ; + rdfs:subClassOf [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Boolean ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + data_sheets_schema:DatasetProperty ; + skos:definition """Information about compensation or incentives provided to human research participants. +""" ; + skos:inScheme data_sheets_schema:human . + + a owl:Class, + linkml:ClassDefinition ; + rdfs:label "HumanSubjectResearch" ; + rdfs:subClassOf [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Boolean ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], @@ -1052,7 +1195,13 @@ data_sheets_schema:Software a owl:Class, owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Information about whether the dataset involves human subjects research and what regulatory or ethical review processes were followed. @@ -1063,75 +1212,72 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "InformedConsent" ; rdfs:subClassOf [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:allValuesFrom linkml:String ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Details about informed consent procedures used in human subjects research. """ ; skos:inScheme data_sheets_schema:human . - a owl:Class, + a owl:Class, linkml:ClassDefinition ; - rdfs:label "VulnerablePopulations" ; + rdfs:label "ParticipantPrivacy" ; rdfs:subClassOf [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], + owl:allValuesFrom linkml:String ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; - skos:definition """Information about protections for at-risk populations in human subjects research. + skos:definition """Information about privacy protections and anonymization procedures for human research participants. """ ; skos:inScheme data_sheets_schema:human . @@ -1141,18 +1287,18 @@ data_sheets_schema:Software a owl:Class, rdfs:subClassOf [ a owl:Restriction ; owl:allValuesFrom linkml:Uri ; owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Is there an erratum? If so, please provide a link or other access point. """ ; @@ -1162,6 +1308,9 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "ExtensionMechanism" ; rdfs:subClassOf [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; owl:allValuesFrom linkml:Uri ; owl:onProperty ], [ a owl:Restriction ; @@ -1173,9 +1322,6 @@ data_sheets_schema:Software a owl:Class, [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """If others want to extend/augment/build on/contribute to the dataset, is there a mechanism for them to do so? If so, please describe how those contributions are validated and communicated. """ ; @@ -1185,20 +1331,20 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "Maintainer" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:CreatorOrMaintainerEnum ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:CreatorOrMaintainerEnum ; - owl:onProperty ], + owl:minCardinality 0 ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Who will be supporting/hosting/maintaining the dataset? """ ; @@ -1209,7 +1355,7 @@ data_sheets_schema:Software a owl:Class, rdfs:label "RetentionLimits" ; rdfs:subClassOf [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], @@ -1218,7 +1364,7 @@ data_sheets_schema:Software a owl:Class, owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], @@ -1231,17 +1377,17 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "UpdatePlan" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], @@ -1255,25 +1401,25 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "VersionAccess" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Will older versions of the dataset continue to be supported/hosted/maintained? If so, how? If not, how will obsolescence be communicated to dataset consumers? @@ -1284,13 +1430,13 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "AddressingGap" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:maxCardinality 1 ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition "Was there a specific gap that needed to be filled by creation of the dataset?" ; @@ -1302,24 +1448,24 @@ data_sheets_schema:Software a owl:Class, rdfs:subClassOf [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:Organization ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:CRediTRoleEnum ; - owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom data_sheets_schema:Person ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:Organization ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:CRediTRoleEnum ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Who created the dataset (e.g., which team, research group) and on behalf of which entity (e.g., company, institution, organization)? This may also be considered a team. """ ; @@ -1332,17 +1478,17 @@ data_sheets_schema:Software a owl:Class, owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Who funded the creation of the dataset? If there is an associated grant, please provide the name of the grantor and the grant name and number. """ ; @@ -1355,10 +1501,10 @@ data_sheets_schema:Software a owl:Class, owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], data_sheets_schema:NamedThing ; skos:definition """The name and/or identifier of the specific mechanism providing monetary support or other resources supporting creation of the dataset. @@ -1396,10 +1542,10 @@ data_sheets_schema:Software a owl:Class, owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition "Was there a specific task in mind for the dataset's application?" ; @@ -1411,42 +1557,42 @@ data_sheets_schema:Software a owl:Class, rdfs:subClassOf [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Float ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Float ; - owl:onProperty ], + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Analysis of annotation quality, inter-annotator agreement metrics, and systematic patterns in annotation disagreements. """ ; @@ -1457,10 +1603,10 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "CleaningStrategy" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Was any cleaning of the data done (e.g., removal of instances, processing of missing values)? @@ -1472,32 +1618,32 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "ImputationProtocol" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Description of data imputation methodology, including techniques used to handle missing values and rationale for chosen approaches. """ ; @@ -1508,17 +1654,11 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "LabelingStrategy" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Integer ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], @@ -1527,19 +1667,19 @@ data_sheets_schema:Software a owl:Class, owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:allValuesFrom linkml:Integer ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], @@ -1548,10 +1688,16 @@ data_sheets_schema:Software a owl:Class, owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Was any labeling of the data done (e.g., part-of-speech tagging)? This class documents the annotation process and quality metrics. """ ; @@ -1564,20 +1710,20 @@ data_sheets_schema:Software a owl:Class, owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Automated or machine-learning-based annotation tools used in dataset creation, including NLP pipelines, computer vision models, or other automated labeling systems. """ ; @@ -1605,18 +1751,18 @@ data_sheets_schema:Software a owl:Class, rdfs:subClassOf [ a owl:Restriction ; owl:allValuesFrom linkml:Uri ; owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Was the "raw" data saved in addition to the preprocessed/cleaned/labeled data? If so, please provide a link or other access point to the "raw" data. """ ; @@ -1654,10 +1800,10 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "FutureUseImpact" ; rdfs:subClassOf [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Is there anything about the dataset's composition or collection that might impact future uses or create risks/harm (e.g., unfair treatment, legal or financial risks)? If so, describe these impacts and any mitigation strategies. @@ -1669,13 +1815,10 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "IntendedUse" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; @@ -1684,11 +1827,14 @@ data_sheets_schema:Software a owl:Class, owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Explicit statement of intended uses for this dataset. Complements FutureUseImpact by focusing on positive, recommended applications rather than risks. Aligns with RO-Crate "Intended Use" field. """ ; @@ -1699,10 +1845,10 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "OtherTask" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """What other tasks could the dataset be used for? @@ -1713,10 +1859,10 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "ProhibitedUse" ; rdfs:subClassOf [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Explicit statement of prohibited or forbidden uses for this dataset. Stronger than DiscouragedUse - these are uses that are explicitly not permitted by license, ethics, or policy. Aligns with RO-Crate "Prohibited Uses" field. @@ -1727,6 +1873,9 @@ data_sheets_schema:Software a owl:Class, linkml:ClassDefinition ; rdfs:label "UseRepository" ; rdfs:subClassOf [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; @@ -1735,9 +1884,6 @@ data_sheets_schema:Software a owl:Class, [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Uri ; owl:onProperty ], @@ -1751,118 +1897,118 @@ data_sheets_schema:Software a owl:Class, rdfs:label "VariableMetadata" ; rdfs:subClassOf [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:allValuesFrom linkml:Integer ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Float ; owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 1 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Float ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], + owl:allValuesFrom linkml:Boolean ; + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:VariableTypeEnum ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:minCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Uriorcurie ; owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:allValuesFrom data_sheets_schema:VariableTypeEnum ; + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Float ; + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], + owl:maxCardinality 1 ; + owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:Integer ; + owl:maxCardinality 1 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition "Metadata describing an individual variable, field, or column in a dataset. Variables may represent measurements, observations, derived values, or categorical attributes." ; skos:exactMatch schema1:PropertyValue ; @@ -1953,6 +2099,16 @@ data_sheets_schema:Software a owl:Class, rdfs:label "no_population_ancestry_research" ; rdfs:subClassOf data_sheets_schema:DataUsePermissionEnum . +schema1:ImageObject a owl:Class, + data_sheets_schema:FileTypeEnum ; + rdfs:label "image_file" ; + rdfs:subClassOf data_sheets_schema:FileTypeEnum . + +schema1:SupplementalMaterial a owl:Class, + data_sheets_schema:FileCollectionTypeEnum ; + rdfs:label "supplementary" ; + rdfs:subClassOf data_sheets_schema:FileCollectionTypeEnum . + AIO:ConfirmationBias a owl:Class, data_sheets_schema:BiasTypeEnum ; rdfs:label "confirmation_bias" ; @@ -1973,6 +2129,11 @@ AIO:RepresentationBias a owl:Class, rdfs:label "representation_bias" ; rdfs:subClassOf data_sheets_schema:BiasTypeEnum . +data_sheets_schema:ArchiveFile a owl:Class, + data_sheets_schema:FileTypeEnum ; + rdfs:label "archive_file" ; + rdfs:subClassOf data_sheets_schema:FileTypeEnum . + a owl:Class, data_sheets_schema:BiasTypeEnum ; rdfs:label "aggregation_bias" ; @@ -2158,6 +2319,11 @@ AIO:RepresentationBias a owl:Class, rdfs:label "unrestricted" ; rdfs:subClassOf data_sheets_schema:ConfidentialityLevelEnum . +data_sheets_schema:ConfigurationFile a owl:Class, + data_sheets_schema:FileTypeEnum ; + rdfs:label "configuration_file" ; + rdfs:subClassOf data_sheets_schema:FileTypeEnum . + a owl:Class, data_sheets_schema:CreatorOrMaintainerEnum ; rdfs:label "academic_institution" ; @@ -2453,6 +2619,73 @@ AIO:RepresentationBias a owl:Class, rdfs:label "UTF-8" ; rdfs:subClassOf data_sheets_schema:EncodingEnum . +data_sheets_schema:FileCollection a owl:Class, + linkml:ClassDefinition ; + rdfs:label "FileCollection" ; + rdfs:subClassOf [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:FileCollectionTypeEnum ; + owl:onProperty data_sheets_schema:collection_type ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:total_bytes ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:file_count ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Integer ; + owl:onProperty data_sheets_schema:file_count ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:compression ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:collection_type ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:external_resources ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:external_resources ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:file_count ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:path ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:path ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:total_bytes ], + [ a owl:Restriction ; + owl:allValuesFrom [ owl:intersectionOf ( [ owl:unionOf ( data_sheets_schema:File data_sheets_schema:FileCollection ) ] data_sheets_schema:Dataset ) ] ; + owl:onProperty data_sheets_schema:resources ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:path ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:CompressionEnum ; + owl:onProperty data_sheets_schema:compression ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:resources ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:compression ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Integer ; + owl:onProperty data_sheets_schema:total_bytes ], + data_sheets_schema:Information ; + skos:altLabel "data files", + "file collection", + "file group" ; + skos:closeMatch dcat:Distribution ; + skos:definition "A collection of files with shared characteristics (format, purpose, structure). Represents a logical grouping of related files within a dataset, such as all training data files, all image files, or all raw data files. Maps to RO-Crate Dataset entities via schema:hasPart relationships." ; + skos:exactMatch schema1:Dataset, + dcat:Dataset ; + skos:inScheme data_sheets_schema:file-collection . + a owl:Class, data_sheets_schema:FormatEnum ; rdfs:label "BZ2" ; @@ -2543,193 +2776,6 @@ AIO:RepresentationBias a owl:Class, rdfs:label "ZIP" ; rdfs:subClassOf data_sheets_schema:FormatEnum . -data_sheets_schema:Information a owl:Class, - linkml:ClassDefinition ; - rdfs:label "Information" ; - rdfs:subClassOf [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:page ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:was_derived_from ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Uriorcurie ; - owl:onProperty data_sheets_schema:publisher ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Datetime ; - owl:onProperty data_sheets_schema:last_updated_on ], - [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:CompressionEnum ; - owl:onProperty data_sheets_schema:compression ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:was_derived_from ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:compression ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:publisher ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Uri ; - owl:onProperty data_sheets_schema:download_url ], - [ a owl:Restriction ; - owl:allValuesFrom [ a rdfs:Datatype ; - owl:onDatatype xsd:string ; - owl:withRestrictions ( [ xsd:pattern "10\\.\\d{4,}\\/.+" ] ) ] ; - owl:onProperty data_sheets_schema:doi ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:language ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:created_by ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:title ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:license ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:license ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:last_updated_on ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:created_on ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:version ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:issued ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:download_url ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:status ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:modified_by ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:language ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:keywords ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:license ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:conforms_to_schema ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:status ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:publisher ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:created_by ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:version ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:last_updated_on ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:conforms_to ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:conforms_to_schema ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:doi ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:compression ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:page ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:modified_by ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:download_url ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:conforms_to ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:was_derived_from ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:title ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Datetime ; - owl:onProperty data_sheets_schema:issued ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:version ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Datetime ; - owl:onProperty data_sheets_schema:created_on ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:status ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:created_on ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:conforms_to_class ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:page ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:modified_by ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:conforms_to_class ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:language ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:issued ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:created_by ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:doi ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:conforms_to_class ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:keywords ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:title ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:conforms_to ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:conforms_to_schema ], - data_sheets_schema:NamedThing ; - skos:closeMatch schema1:CreativeWork ; - skos:definition "Grouping for datasets and data files" ; - skos:inScheme data_sheets_schema:base . - a owl:Class, data_sheets_schema:LimitationTypeEnum ; rdfs:label "coverage_limitation" ; @@ -2860,6 +2906,46 @@ data_sheets_schema:Information a owl:Class, rdfs:label "text/yaml" ; rdfs:subClassOf data_sheets_schema:MediaTypeEnum . +data_sheets_schema:NotebookFile a owl:Class, + data_sheets_schema:FileTypeEnum ; + rdfs:label "notebook_file" ; + rdfs:subClassOf data_sheets_schema:FileTypeEnum . + +data_sheets_schema:OtherFile a owl:Class, + data_sheets_schema:FileTypeEnum ; + rdfs:label "other" ; + rdfs:subClassOf data_sheets_schema:FileTypeEnum . + +data_sheets_schema:OtherFileCollection a owl:Class, + data_sheets_schema:FileCollectionTypeEnum ; + rdfs:label "other" ; + rdfs:subClassOf data_sheets_schema:FileCollectionTypeEnum . + +data_sheets_schema:ProcessedData a owl:Class, + data_sheets_schema:FileCollectionTypeEnum ; + rdfs:label "processed_data" ; + rdfs:subClassOf data_sheets_schema:FileCollectionTypeEnum . + +data_sheets_schema:RawData a owl:Class, + data_sheets_schema:FileCollectionTypeEnum ; + rdfs:label "raw_data" ; + rdfs:subClassOf data_sheets_schema:FileCollectionTypeEnum . + +data_sheets_schema:TestSplit a owl:Class, + data_sheets_schema:FileCollectionTypeEnum ; + rdfs:label "test_split" ; + rdfs:subClassOf data_sheets_schema:FileCollectionTypeEnum . + +data_sheets_schema:TrainingSplit a owl:Class, + data_sheets_schema:FileCollectionTypeEnum ; + rdfs:label "training_split" ; + rdfs:subClassOf data_sheets_schema:FileCollectionTypeEnum . + +data_sheets_schema:ValidationSplit a owl:Class, + data_sheets_schema:FileCollectionTypeEnum ; + rdfs:label "validation_split" ; + rdfs:subClassOf data_sheets_schema:FileCollectionTypeEnum . + a owl:Class, data_sheets_schema:VariableTypeEnum ; rdfs:label "array" ; @@ -3090,51 +3176,89 @@ data_sheets_schema:collection_timeframes a owl:ObjectProperty, rdfs:label "collection_timeframes" ; skos:inScheme . +data_sheets_schema:collection_type a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "collection_type" ; + skos:definition "Type(s) of content in this file collection. A collection may have multiple types, for example a collection containing both raw_data and documentation files would have both types listed." ; + skos:inScheme data_sheets_schema:file-collection . + + a owl:Class, + linkml:ClassDefinition ; + rdfs:label "ExternalResource" ; + rdfs:subClassOf [ a owl:Restriction ; + owl:allValuesFrom linkml:Boolean ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:external_resources ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:external_resources ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], + data_sheets_schema:DatasetProperty ; + skos:definition """Is the dataset self-contained or does it rely on external resources (e.g., websites, other datasets)? If external, are there guarantees that those resources will remain available and unchanged? +""" ; + skos:inScheme data_sheets_schema:composition . + a owl:Class, linkml:ClassDefinition ; rdfs:label "SamplingStrategy" ; rdfs:subClassOf [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; - owl:onProperty ], + owl:onProperty ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; owl:onProperty ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:Boolean ; + owl:onProperty ], + [ a owl:Restriction ; + owl:minCardinality 0 ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty ], + owl:onProperty ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty ], data_sheets_schema:DatasetProperty ; skos:definition """Does the dataset contain all possible instances, or is it a sample (not necessarily random) of instances from a larger set? If so, how representative is it? """ ; @@ -3446,6 +3570,13 @@ data_sheets_schema:existing_uses a owl:ObjectProperty, rdfs:label "existing_uses" ; skos:inScheme . +data_sheets_schema:file_collections a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "file_collections" ; + skos:definition "Collections of files within this dataset. Each collection represents a logical grouping of files with shared characteristics (e.g., all training data, all image files, all raw data files). Maps to nested RO-Crate Dataset entities via schema:hasPart." ; + skos:exactMatch dcat:distribution ; + skos:inScheme . + data_sheets_schema:funders a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "funders" ; @@ -3456,6 +3587,13 @@ data_sheets_schema:future_use_impacts a owl:ObjectProperty, rdfs:label "future_use_impacts" ; skos:inScheme . + a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "anonymization_method" ; + skos:definition """What methods were used to anonymize or de-identify participant data? Include technical details of privacy-preserving techniques. +""" ; + skos:inScheme data_sheets_schema:human . + a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "assent_procedures" ; @@ -3463,6 +3601,27 @@ data_sheets_schema:future_use_impacts a owl:ObjectProperty, """ ; skos:inScheme data_sheets_schema:human . + a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "compensation_amount" ; + skos:definition """What was the amount or value of compensation provided? Include currency or equivalent value. +""" ; + skos:inScheme data_sheets_schema:human . + + a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "compensation_rationale" ; + skos:definition """What was the rationale for the compensation structure? How was the amount determined to be appropriate? +""" ; + skos:inScheme data_sheets_schema:human . + + a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "compensation_type" ; + skos:definition """What type of compensation was provided (e.g., monetary payment, gift cards, course credit, other incentives)? +""" ; + skos:inScheme data_sheets_schema:human . + a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "consent_documentation" ; @@ -3484,6 +3643,13 @@ data_sheets_schema:future_use_impacts a owl:ObjectProperty, """ ; skos:inScheme data_sheets_schema:human . + a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "data_linkage" ; + skos:definition """Can this dataset be linked to other datasets in ways that might compromise participant privacy? +""" ; + skos:inScheme data_sheets_schema:human . + a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "ethics_review_board" ; @@ -3505,6 +3671,13 @@ data_sheets_schema:future_use_impacts a owl:ObjectProperty, """ ; skos:inScheme data_sheets_schema:human . + a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "privacy_techniques" ; + skos:definition """What privacy-preserving techniques were applied (e.g., differential privacy, k-anonymity, data masking)? +""" ; + skos:inScheme data_sheets_schema:human . + a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "regulatory_compliance" ; @@ -3512,6 +3685,13 @@ data_sheets_schema:future_use_impacts a owl:ObjectProperty, """ ; skos:inScheme data_sheets_schema:human . + a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "reidentification_risk" ; + skos:definition """What is the assessed risk of re-identification? What measures were taken to minimize this risk? +""" ; + skos:inScheme data_sheets_schema:human . + a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "special_populations" ; @@ -3673,6 +3853,18 @@ data_sheets_schema:parent_datasets a owl:ObjectProperty, skos:exactMatch schema1:isPartOf ; skos:inScheme . +data_sheets_schema:participant_compensation a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "participant_compensation" ; + skos:definition "Information about compensation or incentives provided to human research participants." ; + skos:inScheme . + +data_sheets_schema:participant_privacy a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "participant_privacy" ; + skos:definition "Information about privacy protections and anonymization procedures for human research participants." ; + skos:inScheme . + a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "annotation_quality_details" ; @@ -3930,6 +4122,17 @@ data_sheets_schema:used_software a owl:ObjectProperty, rdfs:label "no_commercial_use" ; rdfs:subClassOf data_sheets_schema:DataUsePermissionEnum . +schema1:DataDownload a owl:Class, + data_sheets_schema:FileTypeEnum ; + rdfs:label "data_file" ; + rdfs:subClassOf data_sheets_schema:FileTypeEnum . + +data_sheets_schema:at_risk_populations a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "at_risk_populations" ; + skos:definition "Information about protections for at-risk populations (e.g., minors, pregnant women, prisoners) including special safeguards and assent procedures." ; + skos:inScheme . + data_sheets_schema:bytes a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "bytes" ; @@ -4163,13 +4366,6 @@ data_sheets_schema:comment_prefix a owl:ObjectProperty, skos:definition "The dataset that this relationship points to. Can be specified by identifier, URL, or Dataset object." ; skos:inScheme data_sheets_schema:composition . -data_sheets_schema:compression a owl:ObjectProperty, - linkml:SlotDefinition ; - rdfs:label "compression" ; - rdfs:range data_sheets_schema:CompressionEnum ; - skos:definition "compression format used, if any. e.g., gzip, bzip2, zip" ; - skos:inScheme data_sheets_schema:base . - data_sheets_schema:conforms_to a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "conforms_to" ; @@ -4230,6 +4426,7 @@ data_sheets_schema:delimiter a owl:ObjectProperty, data_sheets_schema:dialect a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "dialect" ; + skos:definition "Specific format dialect or variation (e.g., CSV dialect, JSON-LD profile)." ; skos:inScheme data_sheets_schema:base . a owl:ObjectProperty, @@ -4293,6 +4490,18 @@ data_sheets_schema:extension_mechanism a owl:ObjectProperty, rdfs:label "extension_mechanism" ; skos:inScheme . +data_sheets_schema:file_count a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "file_count" ; + skos:definition "Number of files in this collection." ; + skos:inScheme data_sheets_schema:file-collection . + +data_sheets_schema:file_type a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "file_type" ; + skos:definition "Semantic type or purpose of this file (e.g., data_file, code_file, documentation_file, metadata_file)." ; + skos:inScheme data_sheets_schema:file-collection . + data_sheets_schema:format a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "format" ; @@ -4311,6 +4520,19 @@ data_sheets_schema:header a owl:ObjectProperty, rdfs:label "header" ; skos:inScheme data_sheets_schema:base . + a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "at_risk_groups_included" ; + skos:definition """Are any at-risk populations included (e.g., children, pregnant women, prisoners, cognitively impaired individuals)? +""" ; + skos:inScheme data_sheets_schema:human . + + a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "compensation_provided" ; + skos:definition "Were participants compensated for their participation?" ; + skos:inScheme data_sheets_schema:human . + a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "consent_obtained" ; @@ -4323,13 +4545,6 @@ data_sheets_schema:header a owl:ObjectProperty, skos:definition "Does this dataset involve human subjects research?" ; skos:inScheme data_sheets_schema:human . - a owl:ObjectProperty, - linkml:SlotDefinition ; - rdfs:label "vulnerable_groups_included" ; - skos:definition """Are any at-risk populations included (e.g., children, pregnant women, prisoners, cognitively impaired individuals)? -""" ; - skos:inScheme data_sheets_schema:human . - data_sheets_schema:human_subject_research a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "human_subject_research" ; @@ -4474,11 +4689,6 @@ data_sheets_schema:page a owl:ObjectProperty, rdfs:label "page" ; skos:inScheme data_sheets_schema:base . -data_sheets_schema:path a owl:ObjectProperty, - linkml:SlotDefinition ; - rdfs:label "path" ; - skos:inScheme data_sheets_schema:base . - a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "access_url" ; @@ -4571,6 +4781,24 @@ data_sheets_schema:title a owl:ObjectProperty, skos:definition "the official title of the element" ; skos:inScheme data_sheets_schema:base . +data_sheets_schema:total_bytes a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "total_bytes" ; + skos:definition "Total size of all files in bytes." ; + skos:inScheme data_sheets_schema:file-collection . + +data_sheets_schema:total_file_count a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "total_file_count" ; + skos:definition "Total number of files across all file collections in this dataset. Can be aggregated from file_collections[].file_count." ; + skos:inScheme . + +data_sheets_schema:total_size_bytes a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "total_size_bytes" ; + skos:definition "Total size of all files in bytes across all file collections. Can be aggregated from file_collections[].total_bytes." ; + skos:inScheme . + data_sheets_schema:updates a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "updates" ; @@ -4662,551 +4890,743 @@ data_sheets_schema:version_access a owl:ObjectProperty, rdfs:label "version_access" ; skos:inScheme . -data_sheets_schema:vulnerable_populations a owl:ObjectProperty, - linkml:SlotDefinition ; - rdfs:label "vulnerable_populations" ; - skos:definition "Information about protections for at-risk populations (e.g., minors, pregnant women, prisoners) including special safeguards and assent procedures." ; - skos:inScheme . - data_sheets_schema:was_derived_from a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "was_derived_from" ; skos:exactMatch dcterms:source ; skos:inScheme data_sheets_schema:base . -data_sheets_schema:Organization a owl:Class, - linkml:ClassDefinition ; - rdfs:label "Organization" ; - rdfs:subClassOf data_sheets_schema:NamedThing ; - skos:definition "Represents a group or organization." ; - skos:exactMatch schema1:Organization ; - skos:inScheme data_sheets_schema:base . +schema1:Documentation a owl:Class, + data_sheets_schema:FileCollectionTypeEnum, + data_sheets_schema:FileTypeEnum ; + rdfs:label "documentation", + "documentation_file" ; + rdfs:subClassOf data_sheets_schema:FileCollectionTypeEnum, + data_sheets_schema:FileTypeEnum . + +schema1:SoftwareSourceCode a owl:Class, + data_sheets_schema:FileCollectionTypeEnum, + data_sheets_schema:FileTypeEnum ; + rdfs:label "code", + "code_file" ; + rdfs:subClassOf data_sheets_schema:FileCollectionTypeEnum, + data_sheets_schema:FileTypeEnum . + +dcat:CatalogRecord a owl:Class, + data_sheets_schema:FileCollectionTypeEnum, + data_sheets_schema:FileTypeEnum ; + rdfs:label "metadata", + "metadata_file" ; + rdfs:subClassOf data_sheets_schema:FileCollectionTypeEnum, + data_sheets_schema:FileTypeEnum . -data_sheets_schema:Person a owl:Class, +data_sheets_schema:Information a owl:Class, linkml:ClassDefinition ; - rdfs:label "Person" ; + rdfs:label "Information" ; rdfs:subClassOf [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:orcid ], + owl:onProperty data_sheets_schema:publisher ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:orcid ], + owl:onProperty data_sheets_schema:issued ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:affiliation ], + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:page ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:CompressionEnum ; + owl:onProperty data_sheets_schema:compression ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:email ], + owl:onProperty data_sheets_schema:version ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Datetime ; + owl:onProperty data_sheets_schema:last_updated_on ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:modified_by ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:email ], + owl:onProperty data_sheets_schema:conforms_to_class ], [ a owl:Restriction ; - owl:allValuesFrom [ a rdfs:Datatype ; - owl:intersectionOf ( linkml:String [ a rdfs:Datatype ; - owl:onDatatype xsd:string ; - owl:withRestrictions ( [ xsd:pattern "^\\d{4}-\\d{4}-\\d{4}-\\d{3}[0-9X]$" ] ) ] ) ] ; - owl:onProperty data_sheets_schema:orcid ], + owl:allValuesFrom linkml:Uri ; + owl:onProperty data_sheets_schema:download_url ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:Organization ; - owl:onProperty data_sheets_schema:affiliation ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:modified_by ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:email ], - data_sheets_schema:NamedThing ; - skos:definition "An individual human being. This class represents a person in the context of a specific dataset. Attributes like affiliation and email represent the person's current or most relevant contact information for this dataset. For stable cross-dataset identification, use the ORCID field. Note that contributor roles (CRediT) are specified in the usage context (e.g., Creator class) rather than on the Person directly, since roles vary by dataset." ; - skos:exactMatch schema1:Person ; - skos:inScheme data_sheets_schema:base . - -data_sheets_schema:external_resources a owl:ObjectProperty, - linkml:SlotDefinition ; - rdfs:label "external_resources" ; - skos:definition "Links or identifiers for external resources. Can be used either as a list of ExternalResource objects (in Dataset) or as a list of URL strings (within ExternalResource class)." ; - skos:inScheme data_sheets_schema:base . - -data_sheets_schema:resources a owl:ObjectProperty, - linkml:SlotDefinition ; - rdfs:label "resources" ; - rdfs:range data_sheets_schema:Dataset ; - skos:definition "Sub-resources or component datasets. Used in DatasetCollection to contain Dataset objects, and in Dataset to allow nested resource structures." ; - skos:inScheme data_sheets_schema:base . - - a owl:ObjectProperty, - linkml:SlotDefinition . - -data_sheets_schema:Dataset a owl:Class, - linkml:ClassDefinition ; - rdfs:label "Dataset" ; - rdfs:subClassOf [ a owl:Restriction ; + owl:onProperty data_sheets_schema:language ], + [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:version_access ], + owl:onProperty data_sheets_schema:status ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:raw_data_sources ], + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:title ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:language ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:created_on ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:created_by ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:known_limitations ], + owl:onProperty data_sheets_schema:language ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:human_subject_research ], + owl:onProperty data_sheets_schema:was_derived_from ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:doi ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:created_on ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:path ], + owl:onProperty data_sheets_schema:conforms_to_class ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:anomalies ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:last_updated_on ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:known_biases ], + owl:allValuesFrom linkml:Datetime ; + owl:onProperty data_sheets_schema:issued ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:license_and_use_terms ], + owl:onProperty data_sheets_schema:keywords ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:keywords ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:license ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:conforms_to_schema ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:compression ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:conforms_to_schema ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:conforms_to ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:existing_uses ], + owl:onProperty data_sheets_schema:download_url ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:external_resources ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:status ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:known_limitations ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:conforms_to ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:sampling_strategies ], + owl:onProperty data_sheets_schema:compression ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:preprocessing_strategies ], + owl:allValuesFrom [ a rdfs:Datatype ; + owl:onDatatype xsd:string ; + owl:withRestrictions ( [ xsd:pattern "10\\.\\d{4,}\\/.+" ] ) ] ; + owl:onProperty data_sheets_schema:doi ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:sampling_strategies ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:page ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:retention_limit ], + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:was_derived_from ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:download_url ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:instances ], + owl:onProperty data_sheets_schema:version ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:prohibited_uses ], + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:license ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:title ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:bytes ], + owl:onProperty data_sheets_schema:created_by ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:collection_timeframes ], + owl:onProperty data_sheets_schema:license ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:annotation_analyses ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:page ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:version ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Datetime ; + owl:onProperty data_sheets_schema:created_on ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:funders ], + owl:onProperty data_sheets_schema:title ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:subpopulations ], + owl:onProperty data_sheets_schema:conforms_to_schema ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:vulnerable_populations ], + owl:onProperty data_sheets_schema:conforms_to_class ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:hash ], + owl:onProperty data_sheets_schema:was_derived_from ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:Dataset ; - owl:onProperty data_sheets_schema:parent_datasets ], + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:status ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:variables ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:modified_by ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Uriorcurie ; + owl:onProperty data_sheets_schema:publisher ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:cleaning_strategies ], + owl:onProperty data_sheets_schema:conforms_to ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:created_by ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:discouraged_uses ], + owl:onProperty data_sheets_schema:issued ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:publisher ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:addressing_gaps ], + owl:onProperty data_sheets_schema:last_updated_on ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:doi ], + data_sheets_schema:NamedThing ; + skos:closeMatch schema1:CreativeWork ; + skos:definition "Grouping for datasets and data files" ; + skos:inScheme data_sheets_schema:base . + +data_sheets_schema:Organization a owl:Class, + linkml:ClassDefinition ; + rdfs:label "Organization" ; + rdfs:subClassOf data_sheets_schema:NamedThing ; + skos:definition "Represents a group or organization." ; + skos:exactMatch schema1:Organization ; + skos:inScheme data_sheets_schema:base . + +data_sheets_schema:Person a owl:Class, + linkml:ClassDefinition ; + rdfs:label "Person" ; + rdfs:subClassOf [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:email ], + [ a owl:Restriction ; + owl:allValuesFrom [ a rdfs:Datatype ; + owl:intersectionOf ( linkml:String [ a rdfs:Datatype ; + owl:onDatatype xsd:string ; + owl:withRestrictions ( [ xsd:pattern "^\\d{4}-\\d{4}-\\d{4}-\\d{3}[0-9X]$" ] ) ] ) ] ; + owl:onProperty data_sheets_schema:orcid ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:regulatory_restrictions ], + owl:onProperty data_sheets_schema:email ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:data_collectors ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:orcid ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:existing_uses ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:orcid ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:regulatory_restrictions ], + owl:onProperty data_sheets_schema:affiliation ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:addressing_gaps ], + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:email ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:collection_mechanisms ], + owl:allValuesFrom data_sheets_schema:Organization ; + owl:onProperty data_sheets_schema:affiliation ], + data_sheets_schema:NamedThing ; + skos:definition "An individual human being. This class represents a person in the context of a specific dataset. Attributes like affiliation and email represent the person's current or most relevant contact information for this dataset. For stable cross-dataset identification, use the ORCID field. Note that contributor roles (CRediT) are specified in the usage context (e.g., Creator class) rather than on the Person directly, since roles vary by dataset." ; + skos:exactMatch schema1:Person ; + skos:inScheme data_sheets_schema:base . + + a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "examples" ; + skos:definition "List of example intended uses for this dataset.", + "List of examples of known/previous uses of the dataset." ; + skos:inScheme data_sheets_schema:uses . + +data_sheets_schema:NamedThing a owl:Class, + linkml:ClassDefinition ; + rdfs:label "NamedThing" ; + rdfs:subClassOf [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:name ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:labeling_strategies ], + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:description ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:sha256 ], + owl:onProperty data_sheets_schema:id ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:machine_annotation_tools ], + owl:onProperty data_sheets_schema:name ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:future_use_impacts ], + owl:minCardinality 1 ; + owl:onProperty data_sheets_schema:id ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:data_protection_impacts ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:description ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:DataSubset ; - owl:onProperty data_sheets_schema:subsets ], + owl:allValuesFrom linkml:Uriorcurie ; + owl:onProperty data_sheets_schema:id ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:confidential_elements ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:name ], [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:description ] ; + skos:definition "A generic grouping for any identifiable entity." ; + skos:exactMatch schema1:Thing ; + skos:inScheme data_sheets_schema:base . + +data_sheets_schema:Boolean a owl:Class, + linkml:EnumDefinition ; + owl:unionOf ( ) ; + linkml:permissible_values , + , + . + +data_sheets_schema:Dataset a owl:Class, + linkml:ClassDefinition ; + rdfs:label "Dataset" ; + rdfs:subClassOf [ a owl:Restriction ; owl:allValuesFrom ; owl:onProperty data_sheets_schema:related_datasets ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:extension_mechanism ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:raw_sources ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:ip_restrictions ], + owl:onProperty data_sheets_schema:parent_datasets ], [ a owl:Restriction ; owl:allValuesFrom ; owl:onProperty data_sheets_schema:errata ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:encoding ], + owl:onProperty data_sheets_schema:resources ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:citation ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:retention_limit ], + owl:onProperty data_sheets_schema:related_datasets ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:purposes ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:ip_restrictions ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:use_repository ], + owl:onProperty data_sheets_schema:file_collections ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:dialect ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:updates ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:content_warnings ], + owl:onProperty data_sheets_schema:participant_privacy ], [ a owl:Restriction ; owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:data_collectors ], + [ a owl:Restriction ; + owl:allValuesFrom ; owl:onProperty data_sheets_schema:collection_mechanisms ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty data_sheets_schema:raw_sources ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:is_tabular ], + owl:onProperty data_sheets_schema:errata ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:retention_limit ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Integer ; + owl:onProperty data_sheets_schema:total_file_count ], [ a owl:Restriction ; owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:is_deidentified ], + [ a owl:Restriction ; + owl:allValuesFrom ; owl:onProperty data_sheets_schema:distribution_dates ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:license_and_use_terms ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:use_repository ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:bytes ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:maintainers ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:collection_timeframes ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:confidential_elements ], [ a owl:Restriction ; owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:cleaning_strategies ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:instances ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:existing_uses ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:content_warnings ], + [ a owl:Restriction ; + owl:allValuesFrom ; owl:onProperty data_sheets_schema:informed_consent ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:missing_data_documentation ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:ethical_reviews ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:labeling_strategies ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:path ], + owl:onProperty data_sheets_schema:at_risk_populations ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:subpopulations ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:regulatory_restrictions ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:version_access ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:preprocessing_strategies ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:instances ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:participant_compensation ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:creators ], [ a owl:Restriction ; owl:allValuesFrom ; owl:onProperty data_sheets_schema:ethical_reviews ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:cleaning_strategies ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:creators ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:FileCollection ; + owl:onProperty data_sheets_schema:file_collections ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:missing_data_documentation ], [ a owl:Restriction ; owl:allValuesFrom ; owl:onProperty data_sheets_schema:sensitive_elements ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:tasks ], + owl:onProperty data_sheets_schema:sensitive_elements ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:funders ], [ a owl:Restriction ; owl:allValuesFrom ; owl:onProperty data_sheets_schema:distribution_formats ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:citation ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:use_repository ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:discouraged_uses ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:existing_uses ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:raw_data_sources ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:preprocessing_strategies ], + owl:onProperty data_sheets_schema:collection_timeframes ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:Boolean ; + owl:onProperty data_sheets_schema:is_tabular ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:distribution_dates ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:subpopulations ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:confidential_elements ], + [ a owl:Restriction ; + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:citation ], [ a owl:Restriction ; owl:allValuesFrom ; owl:onProperty data_sheets_schema:intended_uses ], - [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:raw_sources ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:hash ], + owl:onProperty data_sheets_schema:version_access ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:preprocessing_strategies ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:extension_mechanism ], + owl:onProperty data_sheets_schema:known_biases ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:license_and_use_terms ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:purposes ], + owl:onProperty data_sheets_schema:addressing_gaps ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:prohibited_uses ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:intended_uses ], + owl:onProperty data_sheets_schema:discouraged_uses ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:human_subject_research ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:vulnerable_populations ], + owl:onProperty data_sheets_schema:imputation_protocols ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:md5 ], + owl:onProperty data_sheets_schema:external_resources ], [ a owl:Restriction ; owl:allValuesFrom ; owl:onProperty data_sheets_schema:other_tasks ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:missing_data_documentation ], - [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:EncodingEnum ; - owl:onProperty data_sheets_schema:encoding ], + owl:onProperty data_sheets_schema:extension_mechanism ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:human_subject_research ], + owl:onProperty data_sheets_schema:tasks ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:is_tabular ], + owl:onProperty data_sheets_schema:ip_restrictions ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:human_subject_research ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:addressing_gaps ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:dialect ], + owl:onProperty data_sheets_schema:at_risk_populations ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:encoding ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:future_use_impacts ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:raw_data_sources ], + owl:onProperty data_sheets_schema:missing_data_documentation ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:extension_mechanism ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:discouraged_uses ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:updates ], + owl:onProperty data_sheets_schema:is_tabular ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:extension_mechanism ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:license_and_use_terms ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:version_access ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:acquisition_methods ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:sampling_strategies ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:data_protection_impacts ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:cleaning_strategies ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:annotation_analyses ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:media_type ], + owl:onProperty data_sheets_schema:data_protection_impacts ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:prohibited_uses ], + owl:onProperty data_sheets_schema:intended_uses ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:labeling_strategies ], + owl:onProperty data_sheets_schema:anomalies ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:tasks ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:is_deidentified ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:citation ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:human_subject_research ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:md5 ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:total_size_bytes ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:resources ], + owl:onProperty data_sheets_schema:acquisition_methods ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:instances ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:known_biases ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:format ], + owl:onProperty data_sheets_schema:is_tabular ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:other_tasks ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:updates ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:sha256 ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:hash ], + owl:onProperty data_sheets_schema:purposes ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:confidential_elements ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:license_and_use_terms ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:informed_consent ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:known_limitations ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:sensitive_elements ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:at_risk_populations ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:sha256 ], + owl:onProperty data_sheets_schema:annotation_analyses ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:known_biases ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:imputation_protocols ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:FormatEnum ; - owl:onProperty data_sheets_schema:format ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:participant_privacy ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:is_deidentified ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:version_access ], + owl:onProperty data_sheets_schema:informed_consent ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:distribution_dates ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:labeling_strategies ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:creators ], + owl:onProperty data_sheets_schema:machine_annotation_tools ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:subsets ], + owl:onProperty data_sheets_schema:raw_data_sources ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:maintainers ], - [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:imputation_protocols ], + owl:onProperty data_sheets_schema:total_file_count ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:dialect ], + owl:onProperty data_sheets_schema:extension_mechanism ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:anomalies ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:data_collectors ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:external_resources ], [ a owl:Restriction ; owl:allValuesFrom data_sheets_schema:Dataset ; owl:onProperty data_sheets_schema:resources ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:content_warnings ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:retention_limit ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:external_resources ], + owl:onProperty data_sheets_schema:variables ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:is_deidentified ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:regulatory_restrictions ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:maintainers ], + owl:minCardinality 0 ; + owl:onProperty data_sheets_schema:known_limitations ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:imputation_protocols ], + owl:onProperty data_sheets_schema:collection_mechanisms ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:regulatory_restrictions ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:tasks ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:retention_limit ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:anomalies ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:content_warnings ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:funders ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:errata ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:variables ], + [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:DataSubset ; + owl:onProperty data_sheets_schema:subsets ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:parent_datasets ], + owl:onProperty data_sheets_schema:subsets ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:data_protection_impacts ], + owl:onProperty data_sheets_schema:human_subject_research ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:is_deidentified ], - [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:funders ], + owl:onProperty data_sheets_schema:total_size_bytes ], [ a owl:Restriction ; owl:allValuesFrom ; owl:onProperty data_sheets_schema:updates ], [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:MediaTypeEnum ; - owl:onProperty data_sheets_schema:media_type ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:retention_limit ], [ a owl:Restriction ; - owl:minCardinality 0 ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:subpopulations ], + [ a owl:Restriction ; + owl:maxCardinality 1 ; owl:onProperty data_sheets_schema:citation ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:path ], + owl:onProperty data_sheets_schema:regulatory_restrictions ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:future_use_impacts ], + owl:onProperty data_sheets_schema:sampling_strategies ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:variables ], + owl:onProperty data_sheets_schema:participant_compensation ], + [ a owl:Restriction ; + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:purposes ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:ethical_reviews ], + owl:onProperty data_sheets_schema:distribution_formats ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:ip_restrictions ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:maintainers ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:license_and_use_terms ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:md5 ], - [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:acquisition_methods ], + owl:onProperty data_sheets_schema:total_file_count ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:creators ], + owl:allValuesFrom data_sheets_schema:Dataset ; + owl:onProperty data_sheets_schema:parent_datasets ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:format ], + owl:onProperty data_sheets_schema:prohibited_uses ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:distribution_formats ], + owl:onProperty data_sheets_schema:version_access ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:allValuesFrom ; owl:onProperty data_sheets_schema:ip_restrictions ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:related_datasets ], - [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:vulnerable_populations ], - [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:machine_annotation_tools ], + owl:onProperty data_sheets_schema:other_tasks ], [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:media_type ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:future_use_impacts ], [ a owl:Restriction ; - owl:allValuesFrom ; - owl:onProperty data_sheets_schema:use_repository ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:collection_timeframes ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:data_collectors ], + owl:onProperty data_sheets_schema:updates ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:acquisition_methods ], + owl:maxCardinality 1 ; + owl:onProperty data_sheets_schema:is_deidentified ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:annotation_analyses ], + owl:allValuesFrom ; + owl:onProperty data_sheets_schema:machine_annotation_tools ], [ a owl:Restriction ; owl:allValuesFrom linkml:Integer ; - owl:onProperty data_sheets_schema:bytes ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Boolean ; - owl:onProperty data_sheets_schema:is_tabular ], + owl:onProperty data_sheets_schema:total_size_bytes ], data_sheets_schema:Information ; skos:altLabel "data file", "data package", @@ -5216,47 +5636,6 @@ data_sheets_schema:Dataset a owl:Class, dcat:Distribution ; skos:inScheme . -data_sheets_schema:NamedThing a owl:Class, - linkml:ClassDefinition ; - rdfs:label "NamedThing" ; - rdfs:subClassOf [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:name ], - [ a owl:Restriction ; - owl:minCardinality 1 ; - owl:onProperty data_sheets_schema:id ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:description ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:description ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:id ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:name ], - [ a owl:Restriction ; - owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:description ], - [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:name ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Uriorcurie ; - owl:onProperty data_sheets_schema:id ] ; - skos:definition "A generic grouping for any identifiable entity." ; - skos:exactMatch schema1:Thing ; - skos:inScheme data_sheets_schema:base . - -data_sheets_schema:Boolean a owl:Class, - linkml:EnumDefinition ; - owl:unionOf ( ) ; - linkml:permissible_values , - , - . - data_sheets_schema:description a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "description" ; @@ -5264,6 +5643,12 @@ data_sheets_schema:description a owl:ObjectProperty, "A human-readable description for this property." ; skos:inScheme data_sheets_schema:base . +data_sheets_schema:external_resources a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "external_resources" ; + skos:definition "Links or identifiers for external resources. Can be used either as a list of ExternalResource objects (in Dataset) or as a list of URL strings (within ExternalResource class)." ; + skos:inScheme data_sheets_schema:base . + data_sheets_schema:id a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "id" ; @@ -5283,6 +5668,18 @@ data_sheets_schema:name a owl:ObjectProperty, "A human-readable name for this property." ; skos:inScheme data_sheets_schema:base . +data_sheets_schema:path a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "path" ; + skos:inScheme data_sheets_schema:base . + +data_sheets_schema:resources a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "resources" ; + rdfs:range data_sheets_schema:Dataset ; + skos:definition "Sub-resources or component items. In DatasetCollection, contains Dataset objects. In Dataset, contains nested Dataset objects. In FileCollection, contains nested FileCollection objects. The specific range is defined via slot_usage in each class." ; + skos:inScheme data_sheets_schema:base . + data_sheets_schema:version a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "version" ; @@ -5295,6 +5692,13 @@ data_sheets_schema:ConfidentialityLevelEnum a owl:Class, , . +data_sheets_schema:compression a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "compression" ; + rdfs:range data_sheets_schema:CompressionEnum ; + skos:definition "compression format used, if any. e.g., gzip, bzip2, zip" ; + skos:inScheme data_sheets_schema:base . + a owl:ObjectProperty, linkml:SlotDefinition ; rdfs:label "response" ; @@ -5323,6 +5727,13 @@ data_sheets_schema:LimitationTypeEnum a owl:Class, , . +data_sheets_schema:variables a owl:ObjectProperty, + linkml:SlotDefinition ; + rdfs:label "variables" ; + skos:definition "Metadata describing individual variables, fields, or columns in the dataset." ; + skos:exactMatch schema1:variableMeasured ; + skos:inScheme . + data_sheets_schema:CompressionEnum a owl:Class, linkml:EnumDefinition ; owl:unionOf ( ) ; @@ -5334,13 +5745,6 @@ data_sheets_schema:CompressionEnum a owl:Class, , . -data_sheets_schema:variables a owl:ObjectProperty, - linkml:SlotDefinition ; - rdfs:label "variables" ; - skos:definition "Metadata describing individual variables, fields, or columns in the dataset." ; - skos:exactMatch schema1:variableMeasured ; - skos:inScheme . - data_sheets_schema:BiasTypeEnum a owl:Class, linkml:EnumDefinition ; owl:unionOf ( AIO:MeasurementBias AIO:HistoricalBias AIO:RepresentationBias AIO:ConfirmationBias ) ; @@ -5354,6 +5758,33 @@ data_sheets_schema:BiasTypeEnum a owl:Class, , . +data_sheets_schema:FileTypeEnum a owl:Class, + linkml:EnumDefinition ; + owl:unionOf ( schema1:DataDownload schema1:SoftwareSourceCode schema1:Documentation dcat:CatalogRecord data_sheets_schema:ConfigurationFile data_sheets_schema:NotebookFile schema1:ImageObject data_sheets_schema:ArchiveFile data_sheets_schema:OtherFile ) ; + linkml:permissible_values schema1:DataDownload, + schema1:Documentation, + schema1:ImageObject, + schema1:SoftwareSourceCode, + dcat:CatalogRecord, + data_sheets_schema:ArchiveFile, + data_sheets_schema:ConfigurationFile, + data_sheets_schema:NotebookFile, + data_sheets_schema:OtherFile . + +data_sheets_schema:FileCollectionTypeEnum a owl:Class, + linkml:EnumDefinition ; + owl:unionOf ( data_sheets_schema:RawData data_sheets_schema:ProcessedData data_sheets_schema:TrainingSplit data_sheets_schema:TestSplit data_sheets_schema:ValidationSplit schema1:Documentation dcat:CatalogRecord schema1:SoftwareSourceCode schema1:SupplementalMaterial data_sheets_schema:OtherFileCollection ) ; + linkml:permissible_values schema1:Documentation, + schema1:SoftwareSourceCode, + schema1:SupplementalMaterial, + dcat:CatalogRecord, + data_sheets_schema:OtherFileCollection, + data_sheets_schema:ProcessedData, + data_sheets_schema:RawData, + data_sheets_schema:TestSplit, + data_sheets_schema:TrainingSplit, + data_sheets_schema:ValidationSplit . + data_sheets_schema:CreatorOrMaintainerEnum a owl:Class, linkml:EnumDefinition ; owl:unionOf ( ) ; @@ -5513,38 +5944,38 @@ data_sheets_schema:DatasetProperty a owl:Class, linkml:ClassDefinition ; rdfs:label "DatasetProperty" ; rdfs:subClassOf [ a owl:Restriction ; + owl:allValuesFrom data_sheets_schema:Software ; + owl:onProperty data_sheets_schema:used_software ], + [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:id ], + owl:onProperty data_sheets_schema:name ], + [ a owl:Restriction ; + owl:allValuesFrom linkml:String ; + owl:onProperty data_sheets_schema:name ], [ a owl:Restriction ; owl:minCardinality 0 ; owl:onProperty data_sheets_schema:used_software ], [ a owl:Restriction ; owl:maxCardinality 1 ; - owl:onProperty data_sheets_schema:description ], - [ a owl:Restriction ; - owl:allValuesFrom linkml:Uriorcurie ; owl:onProperty data_sheets_schema:id ], [ a owl:Restriction ; - owl:allValuesFrom linkml:String ; + owl:maxCardinality 1 ; owl:onProperty data_sheets_schema:description ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:id ], - [ a owl:Restriction ; - owl:allValuesFrom data_sheets_schema:Software ; - owl:onProperty data_sheets_schema:used_software ], + owl:onProperty data_sheets_schema:description ], [ a owl:Restriction ; owl:allValuesFrom linkml:String ; - owl:onProperty data_sheets_schema:name ], + owl:onProperty data_sheets_schema:description ], [ a owl:Restriction ; owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:description ], + owl:onProperty data_sheets_schema:id ], [ a owl:Restriction ; - owl:maxCardinality 1 ; + owl:minCardinality 0 ; owl:onProperty data_sheets_schema:name ], [ a owl:Restriction ; - owl:minCardinality 0 ; - owl:onProperty data_sheets_schema:name ] ; + owl:allValuesFrom linkml:Uriorcurie ; + owl:onProperty data_sheets_schema:id ] ; skos:definition "Represents a single property of a dataset, or a set of related properties." ; skos:inScheme data_sheets_schema:base . diff --git a/src/data_sheets_schema/datamodel/data_sheets_schema.py b/src/data_sheets_schema/datamodel/data_sheets_schema.py index 5df004ee..d61c40f9 100644 --- a/src/data_sheets_schema/datamodel/data_sheets_schema.py +++ b/src/data_sheets_schema/datamodel/data_sheets_schema.py @@ -1,5 +1,5 @@ # Auto generated from data_sheets_schema.yaml by pythongen.py version: 0.0.1 -# Generation date: 2026-03-09T13:05:10 +# Generation date: 2026-04-06T21:13:18 # Schema: data-sheets-schema # # id: https://w3id.org/bridge2ai/data-sheets-schema @@ -69,6 +69,7 @@ B2AI_TOPIC = CurieNamespace('B2AI_TOPIC', 'https://w3id.org/bridge2ai/b2ai-standards-registry/') DUO = CurieNamespace('DUO', 'http://purl.obolibrary.org/obo/DUO_') BIOLINK = CurieNamespace('biolink', 'https://w3id.org/biolink/vocab/') +D4D = CurieNamespace('d4d', 'https://w3id.org/bridge2ai/data-sheets-schema/') D4DCOMPOSITION = CurieNamespace('d4dcomposition', 'https://w3id.org/bridge2ai/data-sheets-schema/composition#') D4DDATAGOVERNANCE = CurieNamespace('d4ddatagovernance', 'https://w3id.org/bridge2ai/data-sheets-schema/data-governance#') D4DDISTRIBUTION = CurieNamespace('d4ddistribution', 'https://w3id.org/bridge2ai/data-sheets-schema/distribution#') @@ -78,7 +79,6 @@ D4DMOTIVATION = CurieNamespace('d4dmotivation', 'https://w3id.org/bridge2ai/data-sheets-schema/motivation#') D4DPREPROCESSING = CurieNamespace('d4dpreprocessing', 'https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling#') D4DUSES = CurieNamespace('d4duses', 'https://w3id.org/bridge2ai/data-sheets-schema/uses#') -D4DVARIABLES = CurieNamespace('d4dvariables', 'https://w3id.org/bridge2ai/data-sheets-schema/variables#') DATA_SHEETS_SCHEMA = CurieNamespace('data_sheets_schema', 'https://w3id.org/bridge2ai/data-sheets-schema/') DATASETS = CurieNamespace('datasets', 'https://w3id.org/linkml/report') DCAT = CurieNamespace('dcat', 'http://www.w3.org/ns/dcat#') @@ -138,6 +138,14 @@ class GrantId(NamedThingId): pass +class FileId(InformationId): + pass + + +class FileCollectionId(InformationId): + pass + + @dataclass(repr=False) class NamedThing(YAMLRoot): """ @@ -199,8 +207,8 @@ class DatasetProperty(YAMLRoot): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["DatasetProperty"] - class_class_curie: ClassVar[str] = "data_sheets_schema:DatasetProperty" + class_class_uri: ClassVar[URIRef] = D4D["DatasetProperty"] + class_class_curie: ClassVar[str] = "d4d:DatasetProperty" class_name: ClassVar[str] = "DatasetProperty" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.DatasetProperty @@ -305,8 +313,8 @@ class Information(NamedThing): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["Information"] - class_class_curie: ClassVar[str] = "data_sheets_schema:Information" + class_class_uri: ClassVar[URIRef] = D4D["Information"] + class_class_curie: ClassVar[str] = "d4d:Information" class_name: ClassVar[str] = "Information" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.Information @@ -409,8 +417,8 @@ class DatasetCollection(Information): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["DatasetCollection"] - class_class_curie: ClassVar[str] = "data_sheets_schema:DatasetCollection" + class_class_uri: ClassVar[URIRef] = D4D["DatasetCollection"] + class_class_curie: ClassVar[str] = "d4d:DatasetCollection" class_name: ClassVar[str] = "DatasetCollection" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.DatasetCollection @@ -442,17 +450,11 @@ class Dataset(Information): class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.Dataset id: Union[str, DatasetId] = None - bytes: Optional[int] = None - dialect: Optional[str] = None - encoding: Optional[Union[str, "EncodingEnum"]] = None - format: Optional[Union[str, "FormatEnum"]] = None - hash: Optional[str] = None - md5: Optional[str] = None - media_type: Optional[Union[str, "MediaTypeEnum"]] = None - path: Optional[str] = None - sha256: Optional[str] = None external_resources: Optional[Union[Union[dict, "ExternalResource"], list[Union[dict, "ExternalResource"]]]] = empty_list() resources: Optional[Union[dict[Union[str, DatasetId], Union[dict, "Dataset"]], list[Union[dict, "Dataset"]]]] = empty_dict() + file_collections: Optional[Union[dict[Union[str, FileCollectionId], Union[dict, "FileCollection"]], list[Union[dict, "FileCollection"]]]] = empty_dict() + total_file_count: Optional[int] = None + total_size_bytes: Optional[int] = None purposes: Optional[Union[Union[dict, "Purpose"], list[Union[dict, "Purpose"]]]] = empty_list() tasks: Optional[Union[Union[dict, "Task"], list[Union[dict, "Task"]]]] = empty_list() addressing_gaps: Optional[Union[Union[dict, "AddressingGap"], list[Union[dict, "AddressingGap"]]]] = empty_list() @@ -478,7 +480,9 @@ class Dataset(Information): data_protection_impacts: Optional[Union[Union[dict, "DataProtectionImpact"], list[Union[dict, "DataProtectionImpact"]]]] = empty_list() human_subject_research: Optional[Union[dict, "HumanSubjectResearch"]] = None informed_consent: Optional[Union[Union[dict, "InformedConsent"], list[Union[dict, "InformedConsent"]]]] = empty_list() - vulnerable_populations: Optional[Union[dict, "VulnerablePopulations"]] = None + at_risk_populations: Optional[Union[dict, "AtRiskPopulations"]] = None + participant_privacy: Optional[Union[Union[dict, "ParticipantPrivacy"], list[Union[dict, "ParticipantPrivacy"]]]] = empty_list() + participant_compensation: Optional[Union[Union[dict, "HumanSubjectCompensation"], list[Union[dict, "HumanSubjectCompensation"]]]] = empty_list() preprocessing_strategies: Optional[Union[Union[dict, "PreprocessingStrategy"], list[Union[dict, "PreprocessingStrategy"]]]] = empty_list() cleaning_strategies: Optional[Union[Union[dict, "CleaningStrategy"], list[Union[dict, "CleaningStrategy"]]]] = empty_list() labeling_strategies: Optional[Union[Union[dict, "LabelingStrategy"], list[Union[dict, "LabelingStrategy"]]]] = empty_list() @@ -517,39 +521,20 @@ def __post_init__(self, *_: str, **kwargs: Any): if not isinstance(self.id, DatasetId): self.id = DatasetId(self.id) - if self.bytes is not None and not isinstance(self.bytes, int): - self.bytes = int(self.bytes) - - if self.dialect is not None and not isinstance(self.dialect, str): - self.dialect = str(self.dialect) - - if self.encoding is not None and not isinstance(self.encoding, EncodingEnum): - self.encoding = EncodingEnum(self.encoding) - - if self.format is not None and not isinstance(self.format, FormatEnum): - self.format = FormatEnum(self.format) - - if self.hash is not None and not isinstance(self.hash, str): - self.hash = str(self.hash) - - if self.md5 is not None and not isinstance(self.md5, str): - self.md5 = str(self.md5) - - if self.media_type is not None and not isinstance(self.media_type, MediaTypeEnum): - self.media_type = MediaTypeEnum(self.media_type) - - if self.path is not None and not isinstance(self.path, str): - self.path = str(self.path) - - if self.sha256 is not None and not isinstance(self.sha256, str): - self.sha256 = str(self.sha256) - if not isinstance(self.external_resources, list): self.external_resources = [self.external_resources] if self.external_resources is not None else [] self.external_resources = [v if isinstance(v, ExternalResource) else ExternalResource(**as_dict(v)) for v in self.external_resources] self._normalize_inlined_as_list(slot_name="resources", slot_type=Dataset, key_name="id", keyed=True) + self._normalize_inlined_as_list(slot_name="file_collections", slot_type=FileCollection, key_name="id", keyed=True) + + if self.total_file_count is not None and not isinstance(self.total_file_count, int): + self.total_file_count = int(self.total_file_count) + + if self.total_size_bytes is not None and not isinstance(self.total_size_bytes, int): + self.total_size_bytes = int(self.total_size_bytes) + if not isinstance(self.purposes, list): self.purposes = [self.purposes] if self.purposes is not None else [] self.purposes = [v if isinstance(v, Purpose) else Purpose(**as_dict(v)) for v in self.purposes] @@ -647,8 +632,16 @@ def __post_init__(self, *_: str, **kwargs: Any): self.informed_consent = [self.informed_consent] if self.informed_consent is not None else [] self.informed_consent = [v if isinstance(v, InformedConsent) else InformedConsent(**as_dict(v)) for v in self.informed_consent] - if self.vulnerable_populations is not None and not isinstance(self.vulnerable_populations, VulnerablePopulations): - self.vulnerable_populations = VulnerablePopulations(**as_dict(self.vulnerable_populations)) + if self.at_risk_populations is not None and not isinstance(self.at_risk_populations, AtRiskPopulations): + self.at_risk_populations = AtRiskPopulations(**as_dict(self.at_risk_populations)) + + if not isinstance(self.participant_privacy, list): + self.participant_privacy = [self.participant_privacy] if self.participant_privacy is not None else [] + self.participant_privacy = [v if isinstance(v, ParticipantPrivacy) else ParticipantPrivacy(**as_dict(v)) for v in self.participant_privacy] + + if not isinstance(self.participant_compensation, list): + self.participant_compensation = [self.participant_compensation] if self.participant_compensation is not None else [] + self.participant_compensation = [v if isinstance(v, HumanSubjectCompensation) else HumanSubjectCompensation(**as_dict(v)) for v in self.participant_compensation] if not isinstance(self.preprocessing_strategies, list): self.preprocessing_strategies = [self.preprocessing_strategies] if self.preprocessing_strategies is not None else [] @@ -772,8 +765,8 @@ class DataSubset(Dataset): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["DataSubset"] - class_class_curie: ClassVar[str] = "data_sheets_schema:DataSubset" + class_class_uri: ClassVar[URIRef] = D4D["DataSubset"] + class_class_curie: ClassVar[str] = "d4d:DataSubset" class_name: ClassVar[str] = "DataSubset" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.DataSubset @@ -803,8 +796,8 @@ class FormatDialect(YAMLRoot): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["FormatDialect"] - class_class_curie: ClassVar[str] = "data_sheets_schema:FormatDialect" + class_class_uri: ClassVar[URIRef] = D4D["FormatDialect"] + class_class_curie: ClassVar[str] = "d4d:FormatDialect" class_name: ClassVar[str] = "FormatDialect" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.FormatDialect @@ -1499,8 +1492,8 @@ class InstanceAcquisition(DatasetProperty): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["collection/InstanceAcquisition"] - class_class_curie: ClassVar[str] = "data_sheets_schema:collection/InstanceAcquisition" + class_class_uri: ClassVar[URIRef] = D4D["collection/InstanceAcquisition"] + class_class_curie: ClassVar[str] = "d4d:collection/InstanceAcquisition" class_name: ClassVar[str] = "InstanceAcquisition" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.InstanceAcquisition @@ -1538,8 +1531,8 @@ class CollectionMechanism(DatasetProperty): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["collection/CollectionMechanism"] - class_class_curie: ClassVar[str] = "data_sheets_schema:collection/CollectionMechanism" + class_class_uri: ClassVar[URIRef] = D4D["collection/CollectionMechanism"] + class_class_curie: ClassVar[str] = "d4d:collection/CollectionMechanism" class_name: ClassVar[str] = "CollectionMechanism" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.CollectionMechanism @@ -1560,8 +1553,8 @@ class DataCollector(DatasetProperty): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["collection/DataCollector"] - class_class_curie: ClassVar[str] = "data_sheets_schema:collection/DataCollector" + class_class_uri: ClassVar[URIRef] = D4D["collection/DataCollector"] + class_class_curie: ClassVar[str] = "d4d:collection/DataCollector" class_name: ClassVar[str] = "DataCollector" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.DataCollector @@ -1587,8 +1580,8 @@ class CollectionTimeframe(DatasetProperty): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["collection/CollectionTimeframe"] - class_class_curie: ClassVar[str] = "data_sheets_schema:collection/CollectionTimeframe" + class_class_uri: ClassVar[URIRef] = D4D["collection/CollectionTimeframe"] + class_class_curie: ClassVar[str] = "d4d:collection/CollectionTimeframe" class_name: ClassVar[str] = "CollectionTimeframe" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.CollectionTimeframe @@ -1618,8 +1611,8 @@ class DirectCollection(DatasetProperty): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["collection/DirectCollection"] - class_class_curie: ClassVar[str] = "data_sheets_schema:collection/DirectCollection" + class_class_uri: ClassVar[URIRef] = D4D["collection/DirectCollection"] + class_class_curie: ClassVar[str] = "d4d:collection/DirectCollection" class_name: ClassVar[str] = "DirectCollection" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.DirectCollection @@ -1645,8 +1638,8 @@ class MissingDataDocumentation(DatasetProperty): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["collection/MissingDataDocumentation"] - class_class_curie: ClassVar[str] = "data_sheets_schema:collection/MissingDataDocumentation" + class_class_uri: ClassVar[URIRef] = D4D["collection/MissingDataDocumentation"] + class_class_curie: ClassVar[str] = "d4d:collection/MissingDataDocumentation" class_name: ClassVar[str] = "MissingDataDocumentation" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.MissingDataDocumentation @@ -1677,8 +1670,8 @@ class RawDataSource(DatasetProperty): """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA["collection/RawDataSource"] - class_class_curie: ClassVar[str] = "data_sheets_schema:collection/RawDataSource" + class_class_uri: ClassVar[URIRef] = D4D["collection/RawDataSource"] + class_class_curie: ClassVar[str] = "d4d:collection/RawDataSource" class_name: ClassVar[str] = "RawDataSource" class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.RawDataSource @@ -2545,25 +2538,98 @@ def __post_init__(self, *_: str, **kwargs: Any): @dataclass(repr=False) -class VulnerablePopulations(DatasetProperty): +class ParticipantPrivacy(DatasetProperty): + """ + Information about privacy protections and anonymization procedures for human research participants. + """ + _inherited_slots: ClassVar[list[str]] = [] + + class_class_uri: ClassVar[URIRef] = D4DHUMAN["ParticipantPrivacy"] + class_class_curie: ClassVar[str] = "d4dhuman:ParticipantPrivacy" + class_name: ClassVar[str] = "ParticipantPrivacy" + class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.ParticipantPrivacy + + anonymization_method: Optional[Union[str, list[str]]] = empty_list() + reidentification_risk: Optional[Union[str, list[str]]] = empty_list() + privacy_techniques: Optional[Union[str, list[str]]] = empty_list() + data_linkage: Optional[Union[str, list[str]]] = empty_list() + + def __post_init__(self, *_: str, **kwargs: Any): + if not isinstance(self.anonymization_method, list): + self.anonymization_method = [self.anonymization_method] if self.anonymization_method is not None else [] + self.anonymization_method = [v if isinstance(v, str) else str(v) for v in self.anonymization_method] + + if not isinstance(self.reidentification_risk, list): + self.reidentification_risk = [self.reidentification_risk] if self.reidentification_risk is not None else [] + self.reidentification_risk = [v if isinstance(v, str) else str(v) for v in self.reidentification_risk] + + if not isinstance(self.privacy_techniques, list): + self.privacy_techniques = [self.privacy_techniques] if self.privacy_techniques is not None else [] + self.privacy_techniques = [v if isinstance(v, str) else str(v) for v in self.privacy_techniques] + + if not isinstance(self.data_linkage, list): + self.data_linkage = [self.data_linkage] if self.data_linkage is not None else [] + self.data_linkage = [v if isinstance(v, str) else str(v) for v in self.data_linkage] + + super().__post_init__(**kwargs) + + +@dataclass(repr=False) +class HumanSubjectCompensation(DatasetProperty): + """ + Information about compensation or incentives provided to human research participants. + """ + _inherited_slots: ClassVar[list[str]] = [] + + class_class_uri: ClassVar[URIRef] = D4DHUMAN["HumanSubjectCompensation"] + class_class_curie: ClassVar[str] = "d4dhuman:HumanSubjectCompensation" + class_name: ClassVar[str] = "HumanSubjectCompensation" + class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.HumanSubjectCompensation + + compensation_provided: Optional[Union[bool, Bool]] = None + compensation_type: Optional[Union[str, list[str]]] = empty_list() + compensation_amount: Optional[Union[str, list[str]]] = empty_list() + compensation_rationale: Optional[Union[str, list[str]]] = empty_list() + + def __post_init__(self, *_: str, **kwargs: Any): + if self.compensation_provided is not None and not isinstance(self.compensation_provided, Bool): + self.compensation_provided = Bool(self.compensation_provided) + + if not isinstance(self.compensation_type, list): + self.compensation_type = [self.compensation_type] if self.compensation_type is not None else [] + self.compensation_type = [v if isinstance(v, str) else str(v) for v in self.compensation_type] + + if not isinstance(self.compensation_amount, list): + self.compensation_amount = [self.compensation_amount] if self.compensation_amount is not None else [] + self.compensation_amount = [v if isinstance(v, str) else str(v) for v in self.compensation_amount] + + if not isinstance(self.compensation_rationale, list): + self.compensation_rationale = [self.compensation_rationale] if self.compensation_rationale is not None else [] + self.compensation_rationale = [v if isinstance(v, str) else str(v) for v in self.compensation_rationale] + + super().__post_init__(**kwargs) + + +@dataclass(repr=False) +class AtRiskPopulations(DatasetProperty): """ Information about protections for at-risk populations in human subjects research. """ _inherited_slots: ClassVar[list[str]] = [] - class_class_uri: ClassVar[URIRef] = D4DHUMAN["VulnerablePopulations"] - class_class_curie: ClassVar[str] = "d4dhuman:VulnerablePopulations" - class_name: ClassVar[str] = "VulnerablePopulations" - class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.VulnerablePopulations + class_class_uri: ClassVar[URIRef] = D4DHUMAN["AtRiskPopulations"] + class_class_curie: ClassVar[str] = "d4dhuman:AtRiskPopulations" + class_name: ClassVar[str] = "AtRiskPopulations" + class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.AtRiskPopulations - vulnerable_groups_included: Optional[Union[bool, Bool]] = None + at_risk_groups_included: Optional[Union[bool, Bool]] = None special_protections: Optional[Union[str, list[str]]] = empty_list() assent_procedures: Optional[Union[str, list[str]]] = empty_list() guardian_consent: Optional[Union[str, list[str]]] = empty_list() def __post_init__(self, *_: str, **kwargs: Any): - if self.vulnerable_groups_included is not None and not isinstance(self.vulnerable_groups_included, Bool): - self.vulnerable_groups_included = Bool(self.vulnerable_groups_included) + if self.at_risk_groups_included is not None and not isinstance(self.at_risk_groups_included, Bool): + self.at_risk_groups_included = Bool(self.at_risk_groups_included) if not isinstance(self.special_protections, list): self.special_protections = [self.special_protections] if self.special_protections is not None else [] @@ -2758,6 +2824,128 @@ def __post_init__(self, *_: str, **kwargs: Any): super().__post_init__(**kwargs) +@dataclass(repr=False) +class File(Information): + """ + A single file within a dataset or file collection. Represents an individual data file, code file, documentation + file, etc. Maps to RO-Crate File entities. + """ + _inherited_slots: ClassVar[list[str]] = [] + + class_class_uri: ClassVar[URIRef] = SCHEMA["MediaObject"] + class_class_curie: ClassVar[str] = "schema:MediaObject" + class_name: ClassVar[str] = "File" + class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.File + + id: Union[str, FileId] = None + bytes: Optional[int] = None + path: Optional[str] = None + format: Optional[Union[str, "FormatEnum"]] = None + encoding: Optional[Union[str, "EncodingEnum"]] = None + compression: Optional[Union[str, "CompressionEnum"]] = None + media_type: Optional[Union[str, "MediaTypeEnum"]] = None + hash: Optional[str] = None + md5: Optional[str] = None + sha256: Optional[str] = None + dialect: Optional[str] = None + file_type: Optional[Union[str, "FileTypeEnum"]] = None + + def __post_init__(self, *_: str, **kwargs: Any): + if self._is_empty(self.id): + self.MissingRequiredField("id") + if not isinstance(self.id, FileId): + self.id = FileId(self.id) + + if self.bytes is not None and not isinstance(self.bytes, int): + self.bytes = int(self.bytes) + + if self.path is not None and not isinstance(self.path, str): + self.path = str(self.path) + + if self.format is not None and not isinstance(self.format, FormatEnum): + self.format = FormatEnum(self.format) + + if self.encoding is not None and not isinstance(self.encoding, EncodingEnum): + self.encoding = EncodingEnum(self.encoding) + + if self.compression is not None and not isinstance(self.compression, CompressionEnum): + self.compression = CompressionEnum(self.compression) + + if self.media_type is not None and not isinstance(self.media_type, MediaTypeEnum): + self.media_type = MediaTypeEnum(self.media_type) + + if self.hash is not None and not isinstance(self.hash, str): + self.hash = str(self.hash) + + if self.md5 is not None and not isinstance(self.md5, str): + self.md5 = str(self.md5) + + if self.sha256 is not None and not isinstance(self.sha256, str): + self.sha256 = str(self.sha256) + + if self.dialect is not None and not isinstance(self.dialect, str): + self.dialect = str(self.dialect) + + if self.file_type is not None and not isinstance(self.file_type, FileTypeEnum): + self.file_type = FileTypeEnum(self.file_type) + + super().__post_init__(**kwargs) + + +@dataclass(repr=False) +class FileCollection(Information): + """ + A collection of files with shared characteristics (format, purpose, structure). Represents a logical grouping of + related files within a dataset, such as all training data files, all image files, or all raw data files. Maps to + RO-Crate Dataset entities via schema:hasPart relationships. + """ + _inherited_slots: ClassVar[list[str]] = [] + + class_class_uri: ClassVar[URIRef] = DCAT["Dataset"] + class_class_curie: ClassVar[str] = "dcat:Dataset" + class_name: ClassVar[str] = "FileCollection" + class_model_uri: ClassVar[URIRef] = DATA_SHEETS_SCHEMA.FileCollection + + id: Union[str, FileCollectionId] = None + path: Optional[str] = None + compression: Optional[Union[str, "CompressionEnum"]] = None + external_resources: Optional[Union[Union[dict, ExternalResource], list[Union[dict, ExternalResource]]]] = empty_list() + resources: Optional[Union[dict[Union[str, DatasetId], Union[dict, Dataset]], list[Union[dict, Dataset]]]] = empty_dict() + collection_type: Optional[Union[Union[str, "FileCollectionTypeEnum"], list[Union[str, "FileCollectionTypeEnum"]]]] = empty_list() + file_count: Optional[int] = None + total_bytes: Optional[int] = None + + def __post_init__(self, *_: str, **kwargs: Any): + if self._is_empty(self.id): + self.MissingRequiredField("id") + if not isinstance(self.id, FileCollectionId): + self.id = FileCollectionId(self.id) + + if self.path is not None and not isinstance(self.path, str): + self.path = str(self.path) + + if self.compression is not None and not isinstance(self.compression, CompressionEnum): + self.compression = CompressionEnum(self.compression) + + if not isinstance(self.external_resources, list): + self.external_resources = [self.external_resources] if self.external_resources is not None else [] + self.external_resources = [v if isinstance(v, ExternalResource) else ExternalResource(**as_dict(v)) for v in self.external_resources] + + self._normalize_inlined_as_list(slot_name="resources", slot_type=Dataset, key_name="id", keyed=True) + + if not isinstance(self.collection_type, list): + self.collection_type = [self.collection_type] if self.collection_type is not None else [] + self.collection_type = [v if isinstance(v, FileCollectionTypeEnum) else FileCollectionTypeEnum(v) for v in self.collection_type] + + if self.file_count is not None and not isinstance(self.file_count, int): + self.file_count = int(self.file_count) + + if self.total_bytes is not None and not isinstance(self.total_bytes, int): + self.total_bytes = int(self.total_bytes) + + super().__post_init__(**kwargs) + + # Enumerations class FormatEnum(EnumDefinitionImpl): @@ -3400,6 +3588,102 @@ class VariableTypeEnum(EnumDefinitionImpl): description="""Common data types for variables. Values are mapped to schema.org DataType vocabulary. See https://schema.org/DataType""", ) +class FileTypeEnum(EnumDefinitionImpl): + """ + Types of individual files within datasets. + """ + data_file = PermissibleValue( + text="data_file", + description="A data file containing dataset content", + meaning=SCHEMA["DataDownload"]) + code_file = PermissibleValue( + text="code_file", + description="A source code or script file", + meaning=SCHEMA["SoftwareSourceCode"]) + documentation_file = PermissibleValue( + text="documentation_file", + description="A documentation file (README, guide, etc.)", + meaning=SCHEMA["Documentation"]) + metadata_file = PermissibleValue( + text="metadata_file", + description="A metadata or annotation file", + meaning=DCAT["CatalogRecord"]) + configuration_file = PermissibleValue( + text="configuration_file", + description="A configuration or settings file", + meaning=D4D["ConfigurationFile"]) + notebook_file = PermissibleValue( + text="notebook_file", + description="A computational notebook file (Jupyter, R Markdown, etc.)", + meaning=D4D["NotebookFile"]) + image_file = PermissibleValue( + text="image_file", + description="An image or visualization file", + meaning=SCHEMA["ImageObject"]) + archive_file = PermissibleValue( + text="archive_file", + description="An archive or compressed file", + meaning=D4D["ArchiveFile"]) + other = PermissibleValue( + text="other", + description="Other file type", + meaning=D4D["OtherFile"]) + + _defn = EnumDefinition( + name="FileTypeEnum", + description="Types of individual files within datasets.", + ) + +class FileCollectionTypeEnum(EnumDefinitionImpl): + """ + Types of file collections within datasets. + """ + raw_data = PermissibleValue( + text="raw_data", + description="Raw, unprocessed data files", + meaning=D4D["RawData"]) + processed_data = PermissibleValue( + text="processed_data", + description="Cleaned, processed, or transformed data files", + meaning=D4D["ProcessedData"]) + training_split = PermissibleValue( + text="training_split", + description="Files designated for model training", + meaning=D4D["TrainingSplit"]) + test_split = PermissibleValue( + text="test_split", + description="Files designated for model testing", + meaning=D4D["TestSplit"]) + validation_split = PermissibleValue( + text="validation_split", + description="Files designated for model validation", + meaning=D4D["ValidationSplit"]) + documentation = PermissibleValue( + text="documentation", + description="Documentation files (README, codebook, etc.)", + meaning=SCHEMA["Documentation"]) + metadata = PermissibleValue( + text="metadata", + description="Metadata or annotation files", + meaning=DCAT["CatalogRecord"]) + code = PermissibleValue( + text="code", + description="Code or script files", + meaning=SCHEMA["SoftwareSourceCode"]) + supplementary = PermissibleValue( + text="supplementary", + description="Supplementary materials", + meaning=SCHEMA["SupplementalMaterial"]) + other = PermissibleValue( + text="other", + description="Other file collection type", + meaning=D4D["OtherFileCollection"]) + + _defn = EnumDefinition( + name="FileCollectionTypeEnum", + description="Types of file collections within datasets.", + ) + # Slots class slots: pass @@ -3425,7 +3709,7 @@ class slots: slots.page = Slot(uri=DCAT.landingPage, name="page", curie=DCAT.curie('landingPage'), model_uri=DATA_SHEETS_SCHEMA.page, domain=None, range=Optional[str]) -slots.dialect = Slot(uri=DATA_SHEETS_SCHEMA.dialect, name="dialect", curie=DATA_SHEETS_SCHEMA.curie('dialect'), +slots.dialect = Slot(uri=SCHEMA.encodingFormat, name="dialect", curie=SCHEMA.curie('encodingFormat'), model_uri=DATA_SHEETS_SCHEMA.dialect, domain=None, range=Optional[str]) slots.bytes = Slot(uri=DCAT.byteSize, name="bytes", curie=DCAT.curie('byteSize'), @@ -3501,169 +3785,184 @@ class slots: slots.external_resources = Slot(uri=DCTERMS.references, name="external_resources", curie=DCTERMS.curie('references'), model_uri=DATA_SHEETS_SCHEMA.external_resources, domain=None, range=Optional[Union[str, list[str]]]) -slots.resources = Slot(uri=DATA_SHEETS_SCHEMA.resources, name="resources", curie=DATA_SHEETS_SCHEMA.curie('resources'), +slots.resources = Slot(uri=SCHEMA.hasPart, name="resources", curie=SCHEMA.curie('hasPart'), model_uri=DATA_SHEETS_SCHEMA.resources, domain=None, range=Optional[Union[Union[str, DatasetId], list[Union[str, DatasetId]]]]) -slots.dataset__purposes = Slot(uri=DATA_SHEETS_SCHEMA.purposes, name="dataset__purposes", curie=DATA_SHEETS_SCHEMA.curie('purposes'), +slots.dataset__file_collections = Slot(uri=SCHEMA.hasPart, name="dataset__file_collections", curie=SCHEMA.curie('hasPart'), + model_uri=DATA_SHEETS_SCHEMA.dataset__file_collections, domain=None, range=Optional[Union[dict[Union[str, FileCollectionId], Union[dict, FileCollection]], list[Union[dict, FileCollection]]]]) + +slots.dataset__total_file_count = Slot(uri=D4D.totalFileCount, name="dataset__total_file_count", curie=D4D.curie('totalFileCount'), + model_uri=DATA_SHEETS_SCHEMA.dataset__total_file_count, domain=None, range=Optional[int]) + +slots.dataset__total_size_bytes = Slot(uri=DCAT.byteSize, name="dataset__total_size_bytes", curie=DCAT.curie('byteSize'), + model_uri=DATA_SHEETS_SCHEMA.dataset__total_size_bytes, domain=None, range=Optional[int]) + +slots.dataset__purposes = Slot(uri=D4D.purposes, name="dataset__purposes", curie=D4D.curie('purposes'), model_uri=DATA_SHEETS_SCHEMA.dataset__purposes, domain=None, range=Optional[Union[Union[dict, Purpose], list[Union[dict, Purpose]]]]) -slots.dataset__tasks = Slot(uri=DATA_SHEETS_SCHEMA.tasks, name="dataset__tasks", curie=DATA_SHEETS_SCHEMA.curie('tasks'), +slots.dataset__tasks = Slot(uri=D4D.tasks, name="dataset__tasks", curie=D4D.curie('tasks'), model_uri=DATA_SHEETS_SCHEMA.dataset__tasks, domain=None, range=Optional[Union[Union[dict, Task], list[Union[dict, Task]]]]) -slots.dataset__addressing_gaps = Slot(uri=DATA_SHEETS_SCHEMA.addressing_gaps, name="dataset__addressing_gaps", curie=DATA_SHEETS_SCHEMA.curie('addressing_gaps'), +slots.dataset__addressing_gaps = Slot(uri=D4D.addressingGaps, name="dataset__addressing_gaps", curie=D4D.curie('addressingGaps'), model_uri=DATA_SHEETS_SCHEMA.dataset__addressing_gaps, domain=None, range=Optional[Union[Union[dict, AddressingGap], list[Union[dict, AddressingGap]]]]) -slots.dataset__creators = Slot(uri=DATA_SHEETS_SCHEMA.creators, name="dataset__creators", curie=DATA_SHEETS_SCHEMA.curie('creators'), +slots.dataset__creators = Slot(uri=SCHEMA.creator, name="dataset__creators", curie=SCHEMA.curie('creator'), model_uri=DATA_SHEETS_SCHEMA.dataset__creators, domain=None, range=Optional[Union[Union[dict, Creator], list[Union[dict, Creator]]]]) -slots.dataset__funders = Slot(uri=DATA_SHEETS_SCHEMA.funders, name="dataset__funders", curie=DATA_SHEETS_SCHEMA.curie('funders'), +slots.dataset__funders = Slot(uri=SCHEMA.funder, name="dataset__funders", curie=SCHEMA.curie('funder'), model_uri=DATA_SHEETS_SCHEMA.dataset__funders, domain=None, range=Optional[Union[Union[dict, FundingMechanism], list[Union[dict, FundingMechanism]]]]) slots.dataset__subsets = Slot(uri=DCAT.distribution, name="dataset__subsets", curie=DCAT.curie('distribution'), model_uri=DATA_SHEETS_SCHEMA.dataset__subsets, domain=None, range=Optional[Union[dict[Union[str, DataSubsetId], Union[dict, DataSubset]], list[Union[dict, DataSubset]]]]) -slots.dataset__instances = Slot(uri=DATA_SHEETS_SCHEMA.instances, name="dataset__instances", curie=DATA_SHEETS_SCHEMA.curie('instances'), +slots.dataset__instances = Slot(uri=D4D.instances, name="dataset__instances", curie=D4D.curie('instances'), model_uri=DATA_SHEETS_SCHEMA.dataset__instances, domain=None, range=Optional[Union[Union[dict, Instance], list[Union[dict, Instance]]]]) -slots.dataset__anomalies = Slot(uri=DATA_SHEETS_SCHEMA.anomalies, name="dataset__anomalies", curie=DATA_SHEETS_SCHEMA.curie('anomalies'), +slots.dataset__anomalies = Slot(uri=D4D.anomalies, name="dataset__anomalies", curie=D4D.curie('anomalies'), model_uri=DATA_SHEETS_SCHEMA.dataset__anomalies, domain=None, range=Optional[Union[Union[dict, DataAnomaly], list[Union[dict, DataAnomaly]]]]) -slots.dataset__known_biases = Slot(uri=DATA_SHEETS_SCHEMA.known_biases, name="dataset__known_biases", curie=DATA_SHEETS_SCHEMA.curie('known_biases'), +slots.dataset__known_biases = Slot(uri=D4D.known_biases, name="dataset__known_biases", curie=D4D.curie('known_biases'), model_uri=DATA_SHEETS_SCHEMA.dataset__known_biases, domain=None, range=Optional[Union[Union[dict, DatasetBias], list[Union[dict, DatasetBias]]]]) -slots.dataset__known_limitations = Slot(uri=DATA_SHEETS_SCHEMA.known_limitations, name="dataset__known_limitations", curie=DATA_SHEETS_SCHEMA.curie('known_limitations'), +slots.dataset__known_limitations = Slot(uri=D4D.known_limitations, name="dataset__known_limitations", curie=D4D.curie('known_limitations'), model_uri=DATA_SHEETS_SCHEMA.dataset__known_limitations, domain=None, range=Optional[Union[Union[dict, DatasetLimitation], list[Union[dict, DatasetLimitation]]]]) -slots.dataset__confidential_elements = Slot(uri=DATA_SHEETS_SCHEMA.confidential_elements, name="dataset__confidential_elements", curie=DATA_SHEETS_SCHEMA.curie('confidential_elements'), +slots.dataset__confidential_elements = Slot(uri=D4D.confidentialElements, name="dataset__confidential_elements", curie=D4D.curie('confidentialElements'), model_uri=DATA_SHEETS_SCHEMA.dataset__confidential_elements, domain=None, range=Optional[Union[Union[dict, Confidentiality], list[Union[dict, Confidentiality]]]]) -slots.dataset__content_warnings = Slot(uri=DATA_SHEETS_SCHEMA.content_warnings, name="dataset__content_warnings", curie=DATA_SHEETS_SCHEMA.curie('content_warnings'), +slots.dataset__content_warnings = Slot(uri=D4D.contentWarnings, name="dataset__content_warnings", curie=D4D.curie('contentWarnings'), model_uri=DATA_SHEETS_SCHEMA.dataset__content_warnings, domain=None, range=Optional[Union[Union[dict, ContentWarning], list[Union[dict, ContentWarning]]]]) -slots.dataset__subpopulations = Slot(uri=DATA_SHEETS_SCHEMA.subpopulations, name="dataset__subpopulations", curie=DATA_SHEETS_SCHEMA.curie('subpopulations'), +slots.dataset__subpopulations = Slot(uri=D4D.subpopulations, name="dataset__subpopulations", curie=D4D.curie('subpopulations'), model_uri=DATA_SHEETS_SCHEMA.dataset__subpopulations, domain=None, range=Optional[Union[Union[dict, Subpopulation], list[Union[dict, Subpopulation]]]]) -slots.dataset__sensitive_elements = Slot(uri=DATA_SHEETS_SCHEMA.sensitive_elements, name="dataset__sensitive_elements", curie=DATA_SHEETS_SCHEMA.curie('sensitive_elements'), +slots.dataset__sensitive_elements = Slot(uri=D4D.sensitiveElements, name="dataset__sensitive_elements", curie=D4D.curie('sensitiveElements'), model_uri=DATA_SHEETS_SCHEMA.dataset__sensitive_elements, domain=None, range=Optional[Union[Union[dict, SensitiveElement], list[Union[dict, SensitiveElement]]]]) -slots.dataset__acquisition_methods = Slot(uri=DATA_SHEETS_SCHEMA.acquisition_methods, name="dataset__acquisition_methods", curie=DATA_SHEETS_SCHEMA.curie('acquisition_methods'), +slots.dataset__acquisition_methods = Slot(uri=D4D.acquisitionMethods, name="dataset__acquisition_methods", curie=D4D.curie('acquisitionMethods'), model_uri=DATA_SHEETS_SCHEMA.dataset__acquisition_methods, domain=None, range=Optional[Union[Union[dict, InstanceAcquisition], list[Union[dict, InstanceAcquisition]]]]) -slots.dataset__collection_mechanisms = Slot(uri=DATA_SHEETS_SCHEMA.collection_mechanisms, name="dataset__collection_mechanisms", curie=DATA_SHEETS_SCHEMA.curie('collection_mechanisms'), +slots.dataset__collection_mechanisms = Slot(uri=D4D.collectionMechanisms, name="dataset__collection_mechanisms", curie=D4D.curie('collectionMechanisms'), model_uri=DATA_SHEETS_SCHEMA.dataset__collection_mechanisms, domain=None, range=Optional[Union[Union[dict, CollectionMechanism], list[Union[dict, CollectionMechanism]]]]) -slots.dataset__sampling_strategies = Slot(uri=DATA_SHEETS_SCHEMA.sampling_strategies, name="dataset__sampling_strategies", curie=DATA_SHEETS_SCHEMA.curie('sampling_strategies'), +slots.dataset__sampling_strategies = Slot(uri=D4D.samplingStrategies, name="dataset__sampling_strategies", curie=D4D.curie('samplingStrategies'), model_uri=DATA_SHEETS_SCHEMA.dataset__sampling_strategies, domain=None, range=Optional[Union[Union[dict, SamplingStrategy], list[Union[dict, SamplingStrategy]]]]) -slots.dataset__data_collectors = Slot(uri=DATA_SHEETS_SCHEMA.data_collectors, name="dataset__data_collectors", curie=DATA_SHEETS_SCHEMA.curie('data_collectors'), +slots.dataset__data_collectors = Slot(uri=D4D.dataCollectors, name="dataset__data_collectors", curie=D4D.curie('dataCollectors'), model_uri=DATA_SHEETS_SCHEMA.dataset__data_collectors, domain=None, range=Optional[Union[Union[dict, DataCollector], list[Union[dict, DataCollector]]]]) -slots.dataset__collection_timeframes = Slot(uri=DATA_SHEETS_SCHEMA.collection_timeframes, name="dataset__collection_timeframes", curie=DATA_SHEETS_SCHEMA.curie('collection_timeframes'), +slots.dataset__collection_timeframes = Slot(uri=D4D.collectionTimeframes, name="dataset__collection_timeframes", curie=D4D.curie('collectionTimeframes'), model_uri=DATA_SHEETS_SCHEMA.dataset__collection_timeframes, domain=None, range=Optional[Union[Union[dict, CollectionTimeframe], list[Union[dict, CollectionTimeframe]]]]) -slots.dataset__missing_data_documentation = Slot(uri=DATA_SHEETS_SCHEMA.missing_data_documentation, name="dataset__missing_data_documentation", curie=DATA_SHEETS_SCHEMA.curie('missing_data_documentation'), +slots.dataset__missing_data_documentation = Slot(uri=D4D.missingDataDocumentation, name="dataset__missing_data_documentation", curie=D4D.curie('missingDataDocumentation'), model_uri=DATA_SHEETS_SCHEMA.dataset__missing_data_documentation, domain=None, range=Optional[Union[Union[dict, MissingDataDocumentation], list[Union[dict, MissingDataDocumentation]]]]) -slots.dataset__raw_data_sources = Slot(uri=DATA_SHEETS_SCHEMA.raw_data_sources, name="dataset__raw_data_sources", curie=DATA_SHEETS_SCHEMA.curie('raw_data_sources'), +slots.dataset__raw_data_sources = Slot(uri=D4D.rawDataSources, name="dataset__raw_data_sources", curie=D4D.curie('rawDataSources'), model_uri=DATA_SHEETS_SCHEMA.dataset__raw_data_sources, domain=None, range=Optional[Union[Union[dict, RawDataSource], list[Union[dict, RawDataSource]]]]) -slots.dataset__ethical_reviews = Slot(uri=DATA_SHEETS_SCHEMA.ethical_reviews, name="dataset__ethical_reviews", curie=DATA_SHEETS_SCHEMA.curie('ethical_reviews'), +slots.dataset__ethical_reviews = Slot(uri=D4D.ethicalReviews, name="dataset__ethical_reviews", curie=D4D.curie('ethicalReviews'), model_uri=DATA_SHEETS_SCHEMA.dataset__ethical_reviews, domain=None, range=Optional[Union[Union[dict, EthicalReview], list[Union[dict, EthicalReview]]]]) -slots.dataset__data_protection_impacts = Slot(uri=DATA_SHEETS_SCHEMA.data_protection_impacts, name="dataset__data_protection_impacts", curie=DATA_SHEETS_SCHEMA.curie('data_protection_impacts'), +slots.dataset__data_protection_impacts = Slot(uri=D4D.dataProtectionImpacts, name="dataset__data_protection_impacts", curie=D4D.curie('dataProtectionImpacts'), model_uri=DATA_SHEETS_SCHEMA.dataset__data_protection_impacts, domain=None, range=Optional[Union[Union[dict, DataProtectionImpact], list[Union[dict, DataProtectionImpact]]]]) -slots.dataset__human_subject_research = Slot(uri=DATA_SHEETS_SCHEMA.human_subject_research, name="dataset__human_subject_research", curie=DATA_SHEETS_SCHEMA.curie('human_subject_research'), +slots.dataset__human_subject_research = Slot(uri=D4D.humanSubjectResearch, name="dataset__human_subject_research", curie=D4D.curie('humanSubjectResearch'), model_uri=DATA_SHEETS_SCHEMA.dataset__human_subject_research, domain=None, range=Optional[Union[dict, HumanSubjectResearch]]) -slots.dataset__informed_consent = Slot(uri=DATA_SHEETS_SCHEMA.informed_consent, name="dataset__informed_consent", curie=DATA_SHEETS_SCHEMA.curie('informed_consent'), +slots.dataset__informed_consent = Slot(uri=D4D.informedConsent, name="dataset__informed_consent", curie=D4D.curie('informedConsent'), model_uri=DATA_SHEETS_SCHEMA.dataset__informed_consent, domain=None, range=Optional[Union[Union[dict, InformedConsent], list[Union[dict, InformedConsent]]]]) -slots.dataset__vulnerable_populations = Slot(uri=DATA_SHEETS_SCHEMA.vulnerable_populations, name="dataset__vulnerable_populations", curie=DATA_SHEETS_SCHEMA.curie('vulnerable_populations'), - model_uri=DATA_SHEETS_SCHEMA.dataset__vulnerable_populations, domain=None, range=Optional[Union[dict, VulnerablePopulations]]) +slots.dataset__at_risk_populations = Slot(uri=D4D.atRiskPopulations, name="dataset__at_risk_populations", curie=D4D.curie('atRiskPopulations'), + model_uri=DATA_SHEETS_SCHEMA.dataset__at_risk_populations, domain=None, range=Optional[Union[dict, AtRiskPopulations]]) -slots.dataset__preprocessing_strategies = Slot(uri=DATA_SHEETS_SCHEMA.preprocessing_strategies, name="dataset__preprocessing_strategies", curie=DATA_SHEETS_SCHEMA.curie('preprocessing_strategies'), +slots.dataset__participant_privacy = Slot(uri=D4D.participantPrivacy, name="dataset__participant_privacy", curie=D4D.curie('participantPrivacy'), + model_uri=DATA_SHEETS_SCHEMA.dataset__participant_privacy, domain=None, range=Optional[Union[Union[dict, ParticipantPrivacy], list[Union[dict, ParticipantPrivacy]]]]) + +slots.dataset__participant_compensation = Slot(uri=D4D.participantCompensation, name="dataset__participant_compensation", curie=D4D.curie('participantCompensation'), + model_uri=DATA_SHEETS_SCHEMA.dataset__participant_compensation, domain=None, range=Optional[Union[Union[dict, HumanSubjectCompensation], list[Union[dict, HumanSubjectCompensation]]]]) + +slots.dataset__preprocessing_strategies = Slot(uri=D4D.preprocessingStrategies, name="dataset__preprocessing_strategies", curie=D4D.curie('preprocessingStrategies'), model_uri=DATA_SHEETS_SCHEMA.dataset__preprocessing_strategies, domain=None, range=Optional[Union[Union[dict, PreprocessingStrategy], list[Union[dict, PreprocessingStrategy]]]]) -slots.dataset__cleaning_strategies = Slot(uri=DATA_SHEETS_SCHEMA.cleaning_strategies, name="dataset__cleaning_strategies", curie=DATA_SHEETS_SCHEMA.curie('cleaning_strategies'), +slots.dataset__cleaning_strategies = Slot(uri=D4D.cleaningStrategies, name="dataset__cleaning_strategies", curie=D4D.curie('cleaningStrategies'), model_uri=DATA_SHEETS_SCHEMA.dataset__cleaning_strategies, domain=None, range=Optional[Union[Union[dict, CleaningStrategy], list[Union[dict, CleaningStrategy]]]]) -slots.dataset__labeling_strategies = Slot(uri=DATA_SHEETS_SCHEMA.labeling_strategies, name="dataset__labeling_strategies", curie=DATA_SHEETS_SCHEMA.curie('labeling_strategies'), +slots.dataset__labeling_strategies = Slot(uri=D4D.labelingStrategies, name="dataset__labeling_strategies", curie=D4D.curie('labelingStrategies'), model_uri=DATA_SHEETS_SCHEMA.dataset__labeling_strategies, domain=None, range=Optional[Union[Union[dict, LabelingStrategy], list[Union[dict, LabelingStrategy]]]]) -slots.dataset__raw_sources = Slot(uri=DATA_SHEETS_SCHEMA.raw_sources, name="dataset__raw_sources", curie=DATA_SHEETS_SCHEMA.curie('raw_sources'), +slots.dataset__raw_sources = Slot(uri=D4D.rawSources, name="dataset__raw_sources", curie=D4D.curie('rawSources'), model_uri=DATA_SHEETS_SCHEMA.dataset__raw_sources, domain=None, range=Optional[Union[Union[dict, RawData], list[Union[dict, RawData]]]]) -slots.dataset__imputation_protocols = Slot(uri=DATA_SHEETS_SCHEMA.imputation_protocols, name="dataset__imputation_protocols", curie=DATA_SHEETS_SCHEMA.curie('imputation_protocols'), +slots.dataset__imputation_protocols = Slot(uri=D4D.imputation_protocols, name="dataset__imputation_protocols", curie=D4D.curie('imputation_protocols'), model_uri=DATA_SHEETS_SCHEMA.dataset__imputation_protocols, domain=None, range=Optional[Union[Union[dict, ImputationProtocol], list[Union[dict, ImputationProtocol]]]]) -slots.dataset__annotation_analyses = Slot(uri=DATA_SHEETS_SCHEMA.annotation_analyses, name="dataset__annotation_analyses", curie=DATA_SHEETS_SCHEMA.curie('annotation_analyses'), +slots.dataset__annotation_analyses = Slot(uri=D4D.annotation_analyses, name="dataset__annotation_analyses", curie=D4D.curie('annotation_analyses'), model_uri=DATA_SHEETS_SCHEMA.dataset__annotation_analyses, domain=None, range=Optional[Union[Union[dict, AnnotationAnalysis], list[Union[dict, AnnotationAnalysis]]]]) slots.dataset__machine_annotation_tools = Slot(uri=DATA_SHEETS_SCHEMA.machine_annotation_tools, name="dataset__machine_annotation_tools", curie=DATA_SHEETS_SCHEMA.curie('machine_annotation_tools'), model_uri=DATA_SHEETS_SCHEMA.dataset__machine_annotation_tools, domain=None, range=Optional[Union[Union[dict, MachineAnnotationTools], list[Union[dict, MachineAnnotationTools]]]]) -slots.dataset__existing_uses = Slot(uri=DATA_SHEETS_SCHEMA.existing_uses, name="dataset__existing_uses", curie=DATA_SHEETS_SCHEMA.curie('existing_uses'), +slots.dataset__existing_uses = Slot(uri=D4D.existingUses, name="dataset__existing_uses", curie=D4D.curie('existingUses'), model_uri=DATA_SHEETS_SCHEMA.dataset__existing_uses, domain=None, range=Optional[Union[Union[dict, ExistingUse], list[Union[dict, ExistingUse]]]]) -slots.dataset__use_repository = Slot(uri=DATA_SHEETS_SCHEMA.use_repository, name="dataset__use_repository", curie=DATA_SHEETS_SCHEMA.curie('use_repository'), +slots.dataset__use_repository = Slot(uri=D4D.useRepository, name="dataset__use_repository", curie=D4D.curie('useRepository'), model_uri=DATA_SHEETS_SCHEMA.dataset__use_repository, domain=None, range=Optional[Union[Union[dict, UseRepository], list[Union[dict, UseRepository]]]]) -slots.dataset__other_tasks = Slot(uri=DATA_SHEETS_SCHEMA.other_tasks, name="dataset__other_tasks", curie=DATA_SHEETS_SCHEMA.curie('other_tasks'), +slots.dataset__other_tasks = Slot(uri=D4D.otherTasks, name="dataset__other_tasks", curie=D4D.curie('otherTasks'), model_uri=DATA_SHEETS_SCHEMA.dataset__other_tasks, domain=None, range=Optional[Union[Union[dict, OtherTask], list[Union[dict, OtherTask]]]]) -slots.dataset__future_use_impacts = Slot(uri=DATA_SHEETS_SCHEMA.future_use_impacts, name="dataset__future_use_impacts", curie=DATA_SHEETS_SCHEMA.curie('future_use_impacts'), +slots.dataset__future_use_impacts = Slot(uri=D4D.futureUseImpacts, name="dataset__future_use_impacts", curie=D4D.curie('futureUseImpacts'), model_uri=DATA_SHEETS_SCHEMA.dataset__future_use_impacts, domain=None, range=Optional[Union[Union[dict, FutureUseImpact], list[Union[dict, FutureUseImpact]]]]) -slots.dataset__discouraged_uses = Slot(uri=DATA_SHEETS_SCHEMA.discouraged_uses, name="dataset__discouraged_uses", curie=DATA_SHEETS_SCHEMA.curie('discouraged_uses'), +slots.dataset__discouraged_uses = Slot(uri=D4D.discouragedUses, name="dataset__discouraged_uses", curie=D4D.curie('discouragedUses'), model_uri=DATA_SHEETS_SCHEMA.dataset__discouraged_uses, domain=None, range=Optional[Union[Union[dict, DiscouragedUse], list[Union[dict, DiscouragedUse]]]]) -slots.dataset__intended_uses = Slot(uri=DATA_SHEETS_SCHEMA.intended_uses, name="dataset__intended_uses", curie=DATA_SHEETS_SCHEMA.curie('intended_uses'), +slots.dataset__intended_uses = Slot(uri=D4D.intendedUses, name="dataset__intended_uses", curie=D4D.curie('intendedUses'), model_uri=DATA_SHEETS_SCHEMA.dataset__intended_uses, domain=None, range=Optional[Union[Union[dict, IntendedUse], list[Union[dict, IntendedUse]]]]) -slots.dataset__prohibited_uses = Slot(uri=DATA_SHEETS_SCHEMA.prohibited_uses, name="dataset__prohibited_uses", curie=DATA_SHEETS_SCHEMA.curie('prohibited_uses'), +slots.dataset__prohibited_uses = Slot(uri=D4D.prohibitedUses, name="dataset__prohibited_uses", curie=D4D.curie('prohibitedUses'), model_uri=DATA_SHEETS_SCHEMA.dataset__prohibited_uses, domain=None, range=Optional[Union[Union[dict, ProhibitedUse], list[Union[dict, ProhibitedUse]]]]) -slots.dataset__distribution_formats = Slot(uri=DATA_SHEETS_SCHEMA.distribution_formats, name="dataset__distribution_formats", curie=DATA_SHEETS_SCHEMA.curie('distribution_formats'), +slots.dataset__distribution_formats = Slot(uri=D4D.distributionFormats, name="dataset__distribution_formats", curie=D4D.curie('distributionFormats'), model_uri=DATA_SHEETS_SCHEMA.dataset__distribution_formats, domain=None, range=Optional[Union[Union[dict, DistributionFormat], list[Union[dict, DistributionFormat]]]]) -slots.dataset__distribution_dates = Slot(uri=DATA_SHEETS_SCHEMA.distribution_dates, name="dataset__distribution_dates", curie=DATA_SHEETS_SCHEMA.curie('distribution_dates'), +slots.dataset__distribution_dates = Slot(uri=D4D.distributionDates, name="dataset__distribution_dates", curie=D4D.curie('distributionDates'), model_uri=DATA_SHEETS_SCHEMA.dataset__distribution_dates, domain=None, range=Optional[Union[Union[dict, DistributionDate], list[Union[dict, DistributionDate]]]]) -slots.dataset__license_and_use_terms = Slot(uri=DATA_SHEETS_SCHEMA.license_and_use_terms, name="dataset__license_and_use_terms", curie=DATA_SHEETS_SCHEMA.curie('license_and_use_terms'), +slots.dataset__license_and_use_terms = Slot(uri=SCHEMA.license, name="dataset__license_and_use_terms", curie=SCHEMA.curie('license'), model_uri=DATA_SHEETS_SCHEMA.dataset__license_and_use_terms, domain=None, range=Optional[Union[dict, LicenseAndUseTerms]]) -slots.dataset__ip_restrictions = Slot(uri=DATA_SHEETS_SCHEMA.ip_restrictions, name="dataset__ip_restrictions", curie=DATA_SHEETS_SCHEMA.curie('ip_restrictions'), +slots.dataset__ip_restrictions = Slot(uri=D4D.ipRestrictions, name="dataset__ip_restrictions", curie=D4D.curie('ipRestrictions'), model_uri=DATA_SHEETS_SCHEMA.dataset__ip_restrictions, domain=None, range=Optional[Union[dict, IPRestrictions]]) -slots.dataset__regulatory_restrictions = Slot(uri=DATA_SHEETS_SCHEMA.regulatory_restrictions, name="dataset__regulatory_restrictions", curie=DATA_SHEETS_SCHEMA.curie('regulatory_restrictions'), +slots.dataset__regulatory_restrictions = Slot(uri=D4D.regulatoryRestrictions, name="dataset__regulatory_restrictions", curie=D4D.curie('regulatoryRestrictions'), model_uri=DATA_SHEETS_SCHEMA.dataset__regulatory_restrictions, domain=None, range=Optional[Union[dict, ExportControlRegulatoryRestrictions]]) -slots.dataset__maintainers = Slot(uri=DATA_SHEETS_SCHEMA.maintainers, name="dataset__maintainers", curie=DATA_SHEETS_SCHEMA.curie('maintainers'), +slots.dataset__maintainers = Slot(uri=D4D.maintainers, name="dataset__maintainers", curie=D4D.curie('maintainers'), model_uri=DATA_SHEETS_SCHEMA.dataset__maintainers, domain=None, range=Optional[Union[Union[dict, Maintainer], list[Union[dict, Maintainer]]]]) -slots.dataset__errata = Slot(uri=DATA_SHEETS_SCHEMA.errata, name="dataset__errata", curie=DATA_SHEETS_SCHEMA.curie('errata'), +slots.dataset__errata = Slot(uri=D4D.errata, name="dataset__errata", curie=D4D.curie('errata'), model_uri=DATA_SHEETS_SCHEMA.dataset__errata, domain=None, range=Optional[Union[Union[dict, Erratum], list[Union[dict, Erratum]]]]) -slots.dataset__updates = Slot(uri=DATA_SHEETS_SCHEMA.updates, name="dataset__updates", curie=DATA_SHEETS_SCHEMA.curie('updates'), +slots.dataset__updates = Slot(uri=D4D.updates, name="dataset__updates", curie=D4D.curie('updates'), model_uri=DATA_SHEETS_SCHEMA.dataset__updates, domain=None, range=Optional[Union[dict, UpdatePlan]]) -slots.dataset__retention_limit = Slot(uri=DATA_SHEETS_SCHEMA.retention_limit, name="dataset__retention_limit", curie=DATA_SHEETS_SCHEMA.curie('retention_limit'), +slots.dataset__retention_limit = Slot(uri=D4D.retentionLimit, name="dataset__retention_limit", curie=D4D.curie('retentionLimit'), model_uri=DATA_SHEETS_SCHEMA.dataset__retention_limit, domain=None, range=Optional[Union[dict, RetentionLimits]]) -slots.dataset__version_access = Slot(uri=DATA_SHEETS_SCHEMA.version_access, name="dataset__version_access", curie=DATA_SHEETS_SCHEMA.curie('version_access'), +slots.dataset__version_access = Slot(uri=DCAT.accessURL, name="dataset__version_access", curie=DCAT.curie('accessURL'), model_uri=DATA_SHEETS_SCHEMA.dataset__version_access, domain=None, range=Optional[Union[dict, VersionAccess]]) -slots.dataset__extension_mechanism = Slot(uri=DATA_SHEETS_SCHEMA.extension_mechanism, name="dataset__extension_mechanism", curie=DATA_SHEETS_SCHEMA.curie('extension_mechanism'), +slots.dataset__extension_mechanism = Slot(uri=D4D.extensionMechanism, name="dataset__extension_mechanism", curie=D4D.curie('extensionMechanism'), model_uri=DATA_SHEETS_SCHEMA.dataset__extension_mechanism, domain=None, range=Optional[Union[dict, ExtensionMechanism]]) slots.dataset__variables = Slot(uri=SCHEMA.variableMeasured, name="dataset__variables", curie=SCHEMA.curie('variableMeasured'), model_uri=DATA_SHEETS_SCHEMA.dataset__variables, domain=None, range=Optional[Union[Union[dict, VariableMetadata], list[Union[dict, VariableMetadata]]]]) -slots.dataset__is_deidentified = Slot(uri=DATA_SHEETS_SCHEMA.is_deidentified, name="dataset__is_deidentified", curie=DATA_SHEETS_SCHEMA.curie('is_deidentified'), +slots.dataset__is_deidentified = Slot(uri=D4D.isDeidentified, name="dataset__is_deidentified", curie=D4D.curie('isDeidentified'), model_uri=DATA_SHEETS_SCHEMA.dataset__is_deidentified, domain=None, range=Optional[Union[dict, Deidentification]]) -slots.dataset__is_tabular = Slot(uri=DATA_SHEETS_SCHEMA.is_tabular, name="dataset__is_tabular", curie=DATA_SHEETS_SCHEMA.curie('is_tabular'), +slots.dataset__is_tabular = Slot(uri=SCHEMA.encodingFormat, name="dataset__is_tabular", curie=SCHEMA.curie('encodingFormat'), model_uri=DATA_SHEETS_SCHEMA.dataset__is_tabular, domain=None, range=Optional[Union[bool, Bool]]) slots.dataset__citation = Slot(uri=SCHEMA.citation, name="dataset__citation", curie=SCHEMA.curie('citation'), @@ -3672,7 +3971,7 @@ class slots: slots.dataset__parent_datasets = Slot(uri=SCHEMA.isPartOf, name="dataset__parent_datasets", curie=SCHEMA.curie('isPartOf'), model_uri=DATA_SHEETS_SCHEMA.dataset__parent_datasets, domain=None, range=Optional[Union[dict[Union[str, DatasetId], Union[dict, Dataset]], list[Union[dict, Dataset]]]]) -slots.dataset__related_datasets = Slot(uri=DATA_SHEETS_SCHEMA.related_datasets, name="dataset__related_datasets", curie=DATA_SHEETS_SCHEMA.curie('related_datasets'), +slots.dataset__related_datasets = Slot(uri=SCHEMA.isRelatedTo, name="dataset__related_datasets", curie=SCHEMA.curie('isRelatedTo'), model_uri=DATA_SHEETS_SCHEMA.dataset__related_datasets, domain=None, range=Optional[Union[Union[dict, DatasetRelationship], list[Union[dict, DatasetRelationship]]]]) slots.dataSubset__is_data_split = Slot(uri=DATA_SHEETS_SCHEMA.is_data_split, name="dataSubset__is_data_split", curie=DATA_SHEETS_SCHEMA.curie('is_data_split'), @@ -3699,7 +3998,7 @@ class slots: slots.datasetProperty__description = Slot(uri=SCHEMA.description, name="datasetProperty__description", curie=SCHEMA.curie('description'), model_uri=DATA_SHEETS_SCHEMA.datasetProperty__description, domain=None, range=Optional[str]) -slots.datasetProperty__used_software = Slot(uri=DATA_SHEETS_SCHEMA.used_software, name="datasetProperty__used_software", curie=DATA_SHEETS_SCHEMA.curie('used_software'), +slots.datasetProperty__used_software = Slot(uri=D4D.usedSoftware, name="datasetProperty__used_software", curie=D4D.curie('usedSoftware'), model_uri=DATA_SHEETS_SCHEMA.datasetProperty__used_software, domain=None, range=Optional[Union[dict[Union[str, SoftwareId], Union[dict, Software]], list[Union[dict, Software]]]]) slots.software__version = Slot(uri=SCHEMA.softwareVersion, name="software__version", curie=SCHEMA.curie('softwareVersion'), @@ -3751,7 +4050,7 @@ class slots: slots.creator__affiliations = Slot(uri=SCHEMA.affiliation, name="creator__affiliations", curie=SCHEMA.curie('affiliation'), model_uri=DATA_SHEETS_SCHEMA.creator__affiliations, domain=None, range=Optional[Union[dict[Union[str, OrganizationId], Union[dict, Organization]], list[Union[dict, Organization]]]]) -slots.creator__credit_roles = Slot(uri=D4DMOTIVATION.credit_roles, name="creator__credit_roles", curie=D4DMOTIVATION.curie('credit_roles'), +slots.creator__credit_roles = Slot(uri=D4D.creditRoles, name="creator__credit_roles", curie=D4D.curie('creditRoles'), model_uri=DATA_SHEETS_SCHEMA.creator__credit_roles, domain=None, range=Optional[Union[Union[str, "CRediTRoleEnum"], list[Union[str, "CRediTRoleEnum"]]]]) slots.fundingMechanism__grantor = Slot(uri=SCHEMA.funder, name="fundingMechanism__grantor", curie=SCHEMA.curie('funder'), @@ -3775,37 +4074,37 @@ class slots: slots.instance__counts = Slot(uri=SCHEMA.numberOfItems, name="instance__counts", curie=SCHEMA.curie('numberOfItems'), model_uri=DATA_SHEETS_SCHEMA.instance__counts, domain=None, range=Optional[int]) -slots.instance__label = Slot(uri=D4DCOMPOSITION.label, name="instance__label", curie=D4DCOMPOSITION.curie('label'), +slots.instance__label = Slot(uri=D4D.hasLabel, name="instance__label", curie=D4D.curie('hasLabel'), model_uri=DATA_SHEETS_SCHEMA.instance__label, domain=None, range=Optional[Union[bool, Bool]]) slots.instance__label_description = Slot(uri=SCHEMA.description, name="instance__label_description", curie=SCHEMA.curie('description'), model_uri=DATA_SHEETS_SCHEMA.instance__label_description, domain=None, range=Optional[str]) -slots.instance__sampling_strategies = Slot(uri=D4DCOMPOSITION.sampling_strategies, name="instance__sampling_strategies", curie=D4DCOMPOSITION.curie('sampling_strategies'), +slots.instance__sampling_strategies = Slot(uri=D4D.samplingStrategies, name="instance__sampling_strategies", curie=D4D.curie('samplingStrategies'), model_uri=DATA_SHEETS_SCHEMA.instance__sampling_strategies, domain=None, range=Optional[Union[Union[dict, SamplingStrategy], list[Union[dict, SamplingStrategy]]]]) -slots.instance__missing_information = Slot(uri=D4DCOMPOSITION.missing_information, name="instance__missing_information", curie=D4DCOMPOSITION.curie('missing_information'), +slots.instance__missing_information = Slot(uri=D4D.missingInformation, name="instance__missing_information", curie=D4D.curie('missingInformation'), model_uri=DATA_SHEETS_SCHEMA.instance__missing_information, domain=None, range=Optional[Union[Union[dict, MissingInfo], list[Union[dict, MissingInfo]]]]) -slots.samplingStrategy__is_sample = Slot(uri=D4DCOMPOSITION.is_sample, name="samplingStrategy__is_sample", curie=D4DCOMPOSITION.curie('is_sample'), +slots.samplingStrategy__is_sample = Slot(uri=D4D.isSample, name="samplingStrategy__is_sample", curie=D4D.curie('isSample'), model_uri=DATA_SHEETS_SCHEMA.samplingStrategy__is_sample, domain=None, range=Optional[Union[Union[bool, Bool], list[Union[bool, Bool]]]]) -slots.samplingStrategy__is_random = Slot(uri=D4DCOMPOSITION.is_random, name="samplingStrategy__is_random", curie=D4DCOMPOSITION.curie('is_random'), +slots.samplingStrategy__is_random = Slot(uri=D4D.isRandom, name="samplingStrategy__is_random", curie=D4D.curie('isRandom'), model_uri=DATA_SHEETS_SCHEMA.samplingStrategy__is_random, domain=None, range=Optional[Union[Union[bool, Bool], list[Union[bool, Bool]]]]) -slots.samplingStrategy__source_data = Slot(uri=D4DCOMPOSITION.source_data, name="samplingStrategy__source_data", curie=D4DCOMPOSITION.curie('source_data'), +slots.samplingStrategy__source_data = Slot(uri=D4D.sourceData, name="samplingStrategy__source_data", curie=D4D.curie('sourceData'), model_uri=DATA_SHEETS_SCHEMA.samplingStrategy__source_data, domain=None, range=Optional[Union[str, list[str]]]) -slots.samplingStrategy__is_representative = Slot(uri=D4DCOMPOSITION.is_representative, name="samplingStrategy__is_representative", curie=D4DCOMPOSITION.curie('is_representative'), +slots.samplingStrategy__is_representative = Slot(uri=D4D.isRepresentative, name="samplingStrategy__is_representative", curie=D4D.curie('isRepresentative'), model_uri=DATA_SHEETS_SCHEMA.samplingStrategy__is_representative, domain=None, range=Optional[Union[Union[bool, Bool], list[Union[bool, Bool]]]]) -slots.samplingStrategy__representative_verification = Slot(uri=D4DCOMPOSITION.representative_verification, name="samplingStrategy__representative_verification", curie=D4DCOMPOSITION.curie('representative_verification'), +slots.samplingStrategy__representative_verification = Slot(uri=SCHEMA.description, name="samplingStrategy__representative_verification", curie=SCHEMA.curie('description'), model_uri=DATA_SHEETS_SCHEMA.samplingStrategy__representative_verification, domain=None, range=Optional[Union[str, list[str]]]) -slots.samplingStrategy__why_not_representative = Slot(uri=D4DCOMPOSITION.why_not_representative, name="samplingStrategy__why_not_representative", curie=D4DCOMPOSITION.curie('why_not_representative'), +slots.samplingStrategy__why_not_representative = Slot(uri=D4D.whyNotRepresentative, name="samplingStrategy__why_not_representative", curie=D4D.curie('whyNotRepresentative'), model_uri=DATA_SHEETS_SCHEMA.samplingStrategy__why_not_representative, domain=None, range=Optional[Union[str, list[str]]]) -slots.samplingStrategy__strategies = Slot(uri=D4DCOMPOSITION.strategies, name="samplingStrategy__strategies", curie=D4DCOMPOSITION.curie('strategies'), +slots.samplingStrategy__strategies = Slot(uri=D4D.strategies, name="samplingStrategy__strategies", curie=D4D.curie('strategies'), model_uri=DATA_SHEETS_SCHEMA.samplingStrategy__strategies, domain=None, range=Optional[Union[str, list[str]]]) slots.missingInfo__missing = Slot(uri=DCTERMS.description, name="missingInfo__missing", curie=DCTERMS.curie('description'), @@ -3823,52 +4122,52 @@ class slots: slots.dataAnomaly__anomaly_details = Slot(uri=DCTERMS.description, name="dataAnomaly__anomaly_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.dataAnomaly__anomaly_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.datasetBias__bias_type = Slot(uri=D4DCOMPOSITION.bias_type, name="datasetBias__bias_type", curie=D4DCOMPOSITION.curie('bias_type'), +slots.datasetBias__bias_type = Slot(uri=D4D.biasType, name="datasetBias__bias_type", curie=D4D.curie('biasType'), model_uri=DATA_SHEETS_SCHEMA.datasetBias__bias_type, domain=None, range=Optional[Union[str, "BiasTypeEnum"]]) slots.datasetBias__bias_description = Slot(uri=DCTERMS.description, name="datasetBias__bias_description", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.datasetBias__bias_description, domain=None, range=Optional[str]) -slots.datasetBias__mitigation_strategy = Slot(uri=D4DCOMPOSITION.mitigation_strategy, name="datasetBias__mitigation_strategy", curie=D4DCOMPOSITION.curie('mitigation_strategy'), +slots.datasetBias__mitigation_strategy = Slot(uri=D4D.mitigation_strategy, name="datasetBias__mitigation_strategy", curie=D4D.curie('mitigation_strategy'), model_uri=DATA_SHEETS_SCHEMA.datasetBias__mitigation_strategy, domain=None, range=Optional[str]) -slots.datasetBias__affected_subsets = Slot(uri=D4DCOMPOSITION.affected_subsets, name="datasetBias__affected_subsets", curie=D4DCOMPOSITION.curie('affected_subsets'), +slots.datasetBias__affected_subsets = Slot(uri=D4D.affectedSubsets, name="datasetBias__affected_subsets", curie=D4D.curie('affectedSubsets'), model_uri=DATA_SHEETS_SCHEMA.datasetBias__affected_subsets, domain=None, range=Optional[Union[str, list[str]]]) -slots.datasetLimitation__limitation_type = Slot(uri=D4DCOMPOSITION.limitation_type, name="datasetLimitation__limitation_type", curie=D4DCOMPOSITION.curie('limitation_type'), +slots.datasetLimitation__limitation_type = Slot(uri=D4D.limitationType, name="datasetLimitation__limitation_type", curie=D4D.curie('limitationType'), model_uri=DATA_SHEETS_SCHEMA.datasetLimitation__limitation_type, domain=None, range=Optional[Union[str, "LimitationTypeEnum"]]) slots.datasetLimitation__limitation_description = Slot(uri=DCTERMS.description, name="datasetLimitation__limitation_description", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.datasetLimitation__limitation_description, domain=None, range=Optional[str]) -slots.datasetLimitation__scope_impact = Slot(uri=D4DCOMPOSITION.scope_impact, name="datasetLimitation__scope_impact", curie=D4DCOMPOSITION.curie('scope_impact'), +slots.datasetLimitation__scope_impact = Slot(uri=D4D.scopeImpact, name="datasetLimitation__scope_impact", curie=D4D.curie('scopeImpact'), model_uri=DATA_SHEETS_SCHEMA.datasetLimitation__scope_impact, domain=None, range=Optional[str]) -slots.datasetLimitation__recommended_mitigation = Slot(uri=D4DCOMPOSITION.recommended_mitigation, name="datasetLimitation__recommended_mitigation", curie=D4DCOMPOSITION.curie('recommended_mitigation'), +slots.datasetLimitation__recommended_mitigation = Slot(uri=D4D.recommendedMitigation, name="datasetLimitation__recommended_mitigation", curie=D4D.curie('recommendedMitigation'), model_uri=DATA_SHEETS_SCHEMA.datasetLimitation__recommended_mitigation, domain=None, range=Optional[str]) slots.externalResource__future_guarantees = Slot(uri=DCTERMS.description, name="externalResource__future_guarantees", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.externalResource__future_guarantees, domain=None, range=Optional[Union[str, list[str]]]) -slots.externalResource__archival = Slot(uri=D4DCOMPOSITION.archival, name="externalResource__archival", curie=D4DCOMPOSITION.curie('archival'), +slots.externalResource__archival = Slot(uri=SCHEMA.archivedAt, name="externalResource__archival", curie=SCHEMA.curie('archivedAt'), model_uri=DATA_SHEETS_SCHEMA.externalResource__archival, domain=None, range=Optional[Union[Union[bool, Bool], list[Union[bool, Bool]]]]) slots.externalResource__restrictions = Slot(uri=DCTERMS.accessRights, name="externalResource__restrictions", curie=DCTERMS.curie('accessRights'), model_uri=DATA_SHEETS_SCHEMA.externalResource__restrictions, domain=None, range=Optional[Union[str, list[str]]]) -slots.confidentiality__confidential_elements_present = Slot(uri=D4DCOMPOSITION.confidential_elements_present, name="confidentiality__confidential_elements_present", curie=D4DCOMPOSITION.curie('confidential_elements_present'), +slots.confidentiality__confidential_elements_present = Slot(uri=D4D.confidential_elements_present, name="confidentiality__confidential_elements_present", curie=D4D.curie('confidential_elements_present'), model_uri=DATA_SHEETS_SCHEMA.confidentiality__confidential_elements_present, domain=None, range=Optional[Union[bool, Bool]]) slots.confidentiality__confidentiality_details = Slot(uri=DCTERMS.description, name="confidentiality__confidentiality_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.confidentiality__confidentiality_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.contentWarning__content_warnings_present = Slot(uri=D4DCOMPOSITION.content_warnings_present, name="contentWarning__content_warnings_present", curie=D4DCOMPOSITION.curie('content_warnings_present'), +slots.contentWarning__content_warnings_present = Slot(uri=D4D.content_warnings_present, name="contentWarning__content_warnings_present", curie=D4D.curie('content_warnings_present'), model_uri=DATA_SHEETS_SCHEMA.contentWarning__content_warnings_present, domain=None, range=Optional[Union[bool, Bool]]) slots.contentWarning__warnings = Slot(uri=DCTERMS.description, name="contentWarning__warnings", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.contentWarning__warnings, domain=None, range=Optional[Union[str, list[str]]]) -slots.subpopulation__subpopulation_elements_present = Slot(uri=D4DCOMPOSITION.subpopulation_elements_present, name="subpopulation__subpopulation_elements_present", curie=D4DCOMPOSITION.curie('subpopulation_elements_present'), +slots.subpopulation__subpopulation_elements_present = Slot(uri=D4D.subpopulationElementsPresent, name="subpopulation__subpopulation_elements_present", curie=D4D.curie('subpopulationElementsPresent'), model_uri=DATA_SHEETS_SCHEMA.subpopulation__subpopulation_elements_present, domain=None, range=Optional[Union[bool, Bool]]) slots.subpopulation__identification = Slot(uri=DCTERMS.description, name="subpopulation__identification", curie=DCTERMS.curie('description'), @@ -3877,43 +4176,43 @@ class slots: slots.subpopulation__distribution = Slot(uri=DCTERMS.description, name="subpopulation__distribution", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.subpopulation__distribution, domain=None, range=Optional[Union[str, list[str]]]) -slots.deidentification__identifiable_elements_present = Slot(uri=D4DCOMPOSITION.identifiable_elements_present, name="deidentification__identifiable_elements_present", curie=D4DCOMPOSITION.curie('identifiable_elements_present'), +slots.deidentification__identifiable_elements_present = Slot(uri=D4D.identifiableElementsPresent, name="deidentification__identifiable_elements_present", curie=D4D.curie('identifiableElementsPresent'), model_uri=DATA_SHEETS_SCHEMA.deidentification__identifiable_elements_present, domain=None, range=Optional[Union[bool, Bool]]) slots.deidentification__method = Slot(uri=D4DCOMPOSITION.method, name="deidentification__method", curie=D4DCOMPOSITION.curie('method'), model_uri=DATA_SHEETS_SCHEMA.deidentification__method, domain=None, range=Optional[str]) -slots.deidentification__identifiers_removed = Slot(uri=D4DCOMPOSITION.identifiers_removed, name="deidentification__identifiers_removed", curie=D4DCOMPOSITION.curie('identifiers_removed'), +slots.deidentification__identifiers_removed = Slot(uri=SCHEMA.identifier, name="deidentification__identifiers_removed", curie=SCHEMA.curie('identifier'), model_uri=DATA_SHEETS_SCHEMA.deidentification__identifiers_removed, domain=None, range=Optional[Union[str, list[str]]]) slots.deidentification__deidentification_details = Slot(uri=DCTERMS.description, name="deidentification__deidentification_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.deidentification__deidentification_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.sensitiveElement__sensitive_elements_present = Slot(uri=D4DCOMPOSITION.sensitive_elements_present, name="sensitiveElement__sensitive_elements_present", curie=D4DCOMPOSITION.curie('sensitive_elements_present'), +slots.sensitiveElement__sensitive_elements_present = Slot(uri=D4D.sensitive_elements_present, name="sensitiveElement__sensitive_elements_present", curie=D4D.curie('sensitive_elements_present'), model_uri=DATA_SHEETS_SCHEMA.sensitiveElement__sensitive_elements_present, domain=None, range=Optional[Union[bool, Bool]]) slots.sensitiveElement__sensitivity_details = Slot(uri=DCTERMS.description, name="sensitiveElement__sensitivity_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.sensitiveElement__sensitivity_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.datasetRelationship__target_dataset = Slot(uri=D4DCOMPOSITION.target_dataset, name="datasetRelationship__target_dataset", curie=D4DCOMPOSITION.curie('target_dataset'), +slots.datasetRelationship__target_dataset = Slot(uri=SCHEMA.identifier, name="datasetRelationship__target_dataset", curie=SCHEMA.curie('identifier'), model_uri=DATA_SHEETS_SCHEMA.datasetRelationship__target_dataset, domain=None, range=str) -slots.datasetRelationship__relationship_type = Slot(uri=D4DCOMPOSITION.relationship_type, name="datasetRelationship__relationship_type", curie=D4DCOMPOSITION.curie('relationship_type'), +slots.datasetRelationship__relationship_type = Slot(uri=SCHEMA.additionalType, name="datasetRelationship__relationship_type", curie=SCHEMA.curie('additionalType'), model_uri=DATA_SHEETS_SCHEMA.datasetRelationship__relationship_type, domain=None, range=Union[str, "DatasetRelationshipTypeEnum"]) slots.datasetRelationship__description = Slot(uri=D4DCOMPOSITION.description, name="datasetRelationship__description", curie=D4DCOMPOSITION.curie('description'), model_uri=DATA_SHEETS_SCHEMA.datasetRelationship__description, domain=None, range=Optional[str]) -slots.instanceAcquisition__was_directly_observed = Slot(uri=DATA_SHEETS_SCHEMA['collection/was_directly_observed'], name="instanceAcquisition__was_directly_observed", curie=DATA_SHEETS_SCHEMA.curie('collection/was_directly_observed'), +slots.instanceAcquisition__was_directly_observed = Slot(uri=D4D.wasDirectlyObserved, name="instanceAcquisition__was_directly_observed", curie=D4D.curie('wasDirectlyObserved'), model_uri=DATA_SHEETS_SCHEMA.instanceAcquisition__was_directly_observed, domain=None, range=Optional[Union[bool, Bool]]) -slots.instanceAcquisition__was_reported_by_subjects = Slot(uri=DATA_SHEETS_SCHEMA['collection/was_reported_by_subjects'], name="instanceAcquisition__was_reported_by_subjects", curie=DATA_SHEETS_SCHEMA.curie('collection/was_reported_by_subjects'), +slots.instanceAcquisition__was_reported_by_subjects = Slot(uri=D4D.wasReportedBySubjects, name="instanceAcquisition__was_reported_by_subjects", curie=D4D.curie('wasReportedBySubjects'), model_uri=DATA_SHEETS_SCHEMA.instanceAcquisition__was_reported_by_subjects, domain=None, range=Optional[Union[bool, Bool]]) -slots.instanceAcquisition__was_inferred_derived = Slot(uri=DATA_SHEETS_SCHEMA['collection/was_inferred_derived'], name="instanceAcquisition__was_inferred_derived", curie=DATA_SHEETS_SCHEMA.curie('collection/was_inferred_derived'), +slots.instanceAcquisition__was_inferred_derived = Slot(uri=D4D.wasInferred, name="instanceAcquisition__was_inferred_derived", curie=D4D.curie('wasInferred'), model_uri=DATA_SHEETS_SCHEMA.instanceAcquisition__was_inferred_derived, domain=None, range=Optional[Union[bool, Bool]]) -slots.instanceAcquisition__was_validated_verified = Slot(uri=DATA_SHEETS_SCHEMA['collection/was_validated_verified'], name="instanceAcquisition__was_validated_verified", curie=DATA_SHEETS_SCHEMA.curie('collection/was_validated_verified'), +slots.instanceAcquisition__was_validated_verified = Slot(uri=D4D.wasValidated, name="instanceAcquisition__was_validated_verified", curie=D4D.curie('wasValidated'), model_uri=DATA_SHEETS_SCHEMA.instanceAcquisition__was_validated_verified, domain=None, range=Optional[Union[bool, Bool]]) slots.instanceAcquisition__acquisition_details = Slot(uri=DCTERMS.description, name="instanceAcquisition__acquisition_details", curie=DCTERMS.curie('description'), @@ -3922,46 +4221,46 @@ class slots: slots.collectionMechanism__mechanism_details = Slot(uri=DCTERMS.description, name="collectionMechanism__mechanism_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.collectionMechanism__mechanism_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.dataCollector__role = Slot(uri=DATA_SHEETS_SCHEMA['collection/role'], name="dataCollector__role", curie=DATA_SHEETS_SCHEMA.curie('collection/role'), +slots.dataCollector__role = Slot(uri=SCHEMA.roleName, name="dataCollector__role", curie=SCHEMA.curie('roleName'), model_uri=DATA_SHEETS_SCHEMA.dataCollector__role, domain=None, range=Optional[str]) slots.dataCollector__collector_details = Slot(uri=DCTERMS.description, name="dataCollector__collector_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.dataCollector__collector_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.collectionTimeframe__start_date = Slot(uri=DATA_SHEETS_SCHEMA['collection/start_date'], name="collectionTimeframe__start_date", curie=DATA_SHEETS_SCHEMA.curie('collection/start_date'), +slots.collectionTimeframe__start_date = Slot(uri=SCHEMA.startDate, name="collectionTimeframe__start_date", curie=SCHEMA.curie('startDate'), model_uri=DATA_SHEETS_SCHEMA.collectionTimeframe__start_date, domain=None, range=Optional[Union[str, XSDDate]]) -slots.collectionTimeframe__end_date = Slot(uri=DATA_SHEETS_SCHEMA['collection/end_date'], name="collectionTimeframe__end_date", curie=DATA_SHEETS_SCHEMA.curie('collection/end_date'), +slots.collectionTimeframe__end_date = Slot(uri=SCHEMA.endDate, name="collectionTimeframe__end_date", curie=SCHEMA.curie('endDate'), model_uri=DATA_SHEETS_SCHEMA.collectionTimeframe__end_date, domain=None, range=Optional[Union[str, XSDDate]]) slots.collectionTimeframe__timeframe_details = Slot(uri=DCTERMS.description, name="collectionTimeframe__timeframe_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.collectionTimeframe__timeframe_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.directCollection__is_direct = Slot(uri=DATA_SHEETS_SCHEMA['collection/is_direct'], name="directCollection__is_direct", curie=DATA_SHEETS_SCHEMA.curie('collection/is_direct'), +slots.directCollection__is_direct = Slot(uri=D4D.isDirect, name="directCollection__is_direct", curie=D4D.curie('isDirect'), model_uri=DATA_SHEETS_SCHEMA.directCollection__is_direct, domain=None, range=Optional[Union[bool, Bool]]) slots.directCollection__collection_details = Slot(uri=DCTERMS.description, name="directCollection__collection_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.directCollection__collection_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.missingDataDocumentation__missing_data_patterns = Slot(uri=DATA_SHEETS_SCHEMA['collection/missing_data_patterns'], name="missingDataDocumentation__missing_data_patterns", curie=DATA_SHEETS_SCHEMA.curie('collection/missing_data_patterns'), +slots.missingDataDocumentation__missing_data_patterns = Slot(uri=D4D.missingDataPatterns, name="missingDataDocumentation__missing_data_patterns", curie=D4D.curie('missingDataPatterns'), model_uri=DATA_SHEETS_SCHEMA.missingDataDocumentation__missing_data_patterns, domain=None, range=Optional[Union[str, list[str]]]) -slots.missingDataDocumentation__missing_data_causes = Slot(uri=DATA_SHEETS_SCHEMA['collection/missing_data_causes'], name="missingDataDocumentation__missing_data_causes", curie=DATA_SHEETS_SCHEMA.curie('collection/missing_data_causes'), +slots.missingDataDocumentation__missing_data_causes = Slot(uri=D4D.missingDataCauses, name="missingDataDocumentation__missing_data_causes", curie=D4D.curie('missingDataCauses'), model_uri=DATA_SHEETS_SCHEMA.missingDataDocumentation__missing_data_causes, domain=None, range=Optional[Union[str, list[str]]]) -slots.missingDataDocumentation__handling_strategy = Slot(uri=DATA_SHEETS_SCHEMA['collection/handling_strategy'], name="missingDataDocumentation__handling_strategy", curie=DATA_SHEETS_SCHEMA.curie('collection/handling_strategy'), +slots.missingDataDocumentation__handling_strategy = Slot(uri=D4D.handlingStrategy, name="missingDataDocumentation__handling_strategy", curie=D4D.curie('handlingStrategy'), model_uri=DATA_SHEETS_SCHEMA.missingDataDocumentation__handling_strategy, domain=None, range=Optional[str]) -slots.rawDataSource__source_description = Slot(uri=DATA_SHEETS_SCHEMA['collection/source_description'], name="rawDataSource__source_description", curie=DATA_SHEETS_SCHEMA.curie('collection/source_description'), +slots.rawDataSource__source_description = Slot(uri=DCTERMS.description, name="rawDataSource__source_description", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.rawDataSource__source_description, domain=None, range=str) -slots.rawDataSource__source_type = Slot(uri=DATA_SHEETS_SCHEMA['collection/source_type'], name="rawDataSource__source_type", curie=DATA_SHEETS_SCHEMA.curie('collection/source_type'), +slots.rawDataSource__source_type = Slot(uri=DCTERMS.type, name="rawDataSource__source_type", curie=DCTERMS.curie('type'), model_uri=DATA_SHEETS_SCHEMA.rawDataSource__source_type, domain=None, range=Optional[Union[str, list[str]]]) -slots.rawDataSource__access_details = Slot(uri=DATA_SHEETS_SCHEMA['collection/access_details'], name="rawDataSource__access_details", curie=DATA_SHEETS_SCHEMA.curie('collection/access_details'), +slots.rawDataSource__access_details = Slot(uri=D4D.accessDetails, name="rawDataSource__access_details", curie=D4D.curie('accessDetails'), model_uri=DATA_SHEETS_SCHEMA.rawDataSource__access_details, domain=None, range=Optional[str]) -slots.rawDataSource__raw_data_format = Slot(uri=DATA_SHEETS_SCHEMA['collection/raw_data_format'], name="rawDataSource__raw_data_format", curie=DATA_SHEETS_SCHEMA.curie('collection/raw_data_format'), +slots.rawDataSource__raw_data_format = Slot(uri=D4D.rawDataFormat, name="rawDataSource__raw_data_format", curie=D4D.curie('rawDataFormat'), model_uri=DATA_SHEETS_SCHEMA.rawDataSource__raw_data_format, domain=None, range=Optional[Union[str, list[str]]]) slots.preprocessingStrategy__preprocessing_details = Slot(uri=DCTERMS.description, name="preprocessingStrategy__preprocessing_details", curie=DCTERMS.curie('description'), @@ -3973,64 +4272,64 @@ class slots: slots.labelingStrategy__data_annotation_platform = Slot(uri=SCHEMA.instrument, name="labelingStrategy__data_annotation_platform", curie=SCHEMA.curie('instrument'), model_uri=DATA_SHEETS_SCHEMA.labelingStrategy__data_annotation_platform, domain=None, range=Optional[str]) -slots.labelingStrategy__data_annotation_protocol = Slot(uri=D4DPREPROCESSING.data_annotation_protocol, name="labelingStrategy__data_annotation_protocol", curie=D4DPREPROCESSING.curie('data_annotation_protocol'), +slots.labelingStrategy__data_annotation_protocol = Slot(uri=D4D.dataAnnotationProtocol, name="labelingStrategy__data_annotation_protocol", curie=D4D.curie('dataAnnotationProtocol'), model_uri=DATA_SHEETS_SCHEMA.labelingStrategy__data_annotation_protocol, domain=None, range=Optional[Union[str, list[str]]]) -slots.labelingStrategy__annotations_per_item = Slot(uri=D4DPREPROCESSING.annotations_per_item, name="labelingStrategy__annotations_per_item", curie=D4DPREPROCESSING.curie('annotations_per_item'), +slots.labelingStrategy__annotations_per_item = Slot(uri=D4D.annotationsPerItem, name="labelingStrategy__annotations_per_item", curie=D4D.curie('annotationsPerItem'), model_uri=DATA_SHEETS_SCHEMA.labelingStrategy__annotations_per_item, domain=None, range=Optional[int]) slots.labelingStrategy__inter_annotator_agreement = Slot(uri=SCHEMA.measurementMethod, name="labelingStrategy__inter_annotator_agreement", curie=SCHEMA.curie('measurementMethod'), model_uri=DATA_SHEETS_SCHEMA.labelingStrategy__inter_annotator_agreement, domain=None, range=Optional[str]) -slots.labelingStrategy__annotator_demographics = Slot(uri=D4DPREPROCESSING.annotator_demographics, name="labelingStrategy__annotator_demographics", curie=D4DPREPROCESSING.curie('annotator_demographics'), +slots.labelingStrategy__annotator_demographics = Slot(uri=D4D.annotatorDemographics, name="labelingStrategy__annotator_demographics", curie=D4D.curie('annotatorDemographics'), model_uri=DATA_SHEETS_SCHEMA.labelingStrategy__annotator_demographics, domain=None, range=Optional[Union[str, list[str]]]) slots.labelingStrategy__labeling_details = Slot(uri=DCTERMS.description, name="labelingStrategy__labeling_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.labelingStrategy__labeling_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.rawData__access_url = Slot(uri=D4DPREPROCESSING.access_url, name="rawData__access_url", curie=D4DPREPROCESSING.curie('access_url'), +slots.rawData__access_url = Slot(uri=DCAT.accessURL, name="rawData__access_url", curie=DCAT.curie('accessURL'), model_uri=DATA_SHEETS_SCHEMA.rawData__access_url, domain=None, range=Optional[Union[str, URI]]) slots.rawData__raw_data_details = Slot(uri=DCTERMS.description, name="rawData__raw_data_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.rawData__raw_data_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.imputationProtocol__imputation_method = Slot(uri=D4DPREPROCESSING.imputation_method, name="imputationProtocol__imputation_method", curie=D4DPREPROCESSING.curie('imputation_method'), +slots.imputationProtocol__imputation_method = Slot(uri=D4D.imputation_method, name="imputationProtocol__imputation_method", curie=D4D.curie('imputation_method'), model_uri=DATA_SHEETS_SCHEMA.imputationProtocol__imputation_method, domain=None, range=Optional[Union[str, list[str]]]) -slots.imputationProtocol__imputed_fields = Slot(uri=D4DPREPROCESSING.imputed_fields, name="imputationProtocol__imputed_fields", curie=D4DPREPROCESSING.curie('imputed_fields'), +slots.imputationProtocol__imputed_fields = Slot(uri=D4D.imputed_fields, name="imputationProtocol__imputed_fields", curie=D4D.curie('imputed_fields'), model_uri=DATA_SHEETS_SCHEMA.imputationProtocol__imputed_fields, domain=None, range=Optional[Union[str, list[str]]]) -slots.imputationProtocol__imputation_rationale = Slot(uri=D4DPREPROCESSING.imputation_rationale, name="imputationProtocol__imputation_rationale", curie=D4DPREPROCESSING.curie('imputation_rationale'), +slots.imputationProtocol__imputation_rationale = Slot(uri=D4D.imputation_rationale, name="imputationProtocol__imputation_rationale", curie=D4D.curie('imputation_rationale'), model_uri=DATA_SHEETS_SCHEMA.imputationProtocol__imputation_rationale, domain=None, range=Optional[str]) -slots.imputationProtocol__imputation_validation = Slot(uri=D4DPREPROCESSING.imputation_validation, name="imputationProtocol__imputation_validation", curie=D4DPREPROCESSING.curie('imputation_validation'), +slots.imputationProtocol__imputation_validation = Slot(uri=D4D.imputation_validation, name="imputationProtocol__imputation_validation", curie=D4D.curie('imputation_validation'), model_uri=DATA_SHEETS_SCHEMA.imputationProtocol__imputation_validation, domain=None, range=Optional[Union[str, list[str]]]) -slots.annotationAnalysis__inter_annotator_agreement_score = Slot(uri=D4DPREPROCESSING.inter_annotator_agreement_score, name="annotationAnalysis__inter_annotator_agreement_score", curie=D4DPREPROCESSING.curie('inter_annotator_agreement_score'), +slots.annotationAnalysis__inter_annotator_agreement_score = Slot(uri=D4D.interAnnotatorAgreementScore, name="annotationAnalysis__inter_annotator_agreement_score", curie=D4D.curie('interAnnotatorAgreementScore'), model_uri=DATA_SHEETS_SCHEMA.annotationAnalysis__inter_annotator_agreement_score, domain=None, range=Optional[float]) -slots.annotationAnalysis__agreement_metric = Slot(uri=D4DPREPROCESSING.agreement_metric, name="annotationAnalysis__agreement_metric", curie=D4DPREPROCESSING.curie('agreement_metric'), +slots.annotationAnalysis__agreement_metric = Slot(uri=D4D.agreementMetric, name="annotationAnalysis__agreement_metric", curie=D4D.curie('agreementMetric'), model_uri=DATA_SHEETS_SCHEMA.annotationAnalysis__agreement_metric, domain=None, range=Optional[str]) -slots.annotationAnalysis__analysis_method = Slot(uri=D4DPREPROCESSING.analysis_method, name="annotationAnalysis__analysis_method", curie=D4DPREPROCESSING.curie('analysis_method'), +slots.annotationAnalysis__analysis_method = Slot(uri=D4D.analysisMethod, name="annotationAnalysis__analysis_method", curie=D4D.curie('analysisMethod'), model_uri=DATA_SHEETS_SCHEMA.annotationAnalysis__analysis_method, domain=None, range=Optional[str]) -slots.annotationAnalysis__disagreement_patterns = Slot(uri=D4DPREPROCESSING.disagreement_patterns, name="annotationAnalysis__disagreement_patterns", curie=D4DPREPROCESSING.curie('disagreement_patterns'), +slots.annotationAnalysis__disagreement_patterns = Slot(uri=D4D.disagreementPatterns, name="annotationAnalysis__disagreement_patterns", curie=D4D.curie('disagreementPatterns'), model_uri=DATA_SHEETS_SCHEMA.annotationAnalysis__disagreement_patterns, domain=None, range=Optional[Union[str, list[str]]]) -slots.annotationAnalysis__annotation_quality_details = Slot(uri=D4DPREPROCESSING.annotation_quality_details, name="annotationAnalysis__annotation_quality_details", curie=D4DPREPROCESSING.curie('annotation_quality_details'), +slots.annotationAnalysis__annotation_quality_details = Slot(uri=D4D.annotationQualityDetails, name="annotationAnalysis__annotation_quality_details", curie=D4D.curie('annotationQualityDetails'), model_uri=DATA_SHEETS_SCHEMA.annotationAnalysis__annotation_quality_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.machineAnnotationTools__tools = Slot(uri=D4DPREPROCESSING.tools, name="machineAnnotationTools__tools", curie=D4DPREPROCESSING.curie('tools'), +slots.machineAnnotationTools__tools = Slot(uri=SCHEMA.name, name="machineAnnotationTools__tools", curie=SCHEMA.curie('name'), model_uri=DATA_SHEETS_SCHEMA.machineAnnotationTools__tools, domain=None, range=Optional[Union[str, list[str]]]) -slots.machineAnnotationTools__tool_descriptions = Slot(uri=D4DPREPROCESSING.tool_descriptions, name="machineAnnotationTools__tool_descriptions", curie=D4DPREPROCESSING.curie('tool_descriptions'), +slots.machineAnnotationTools__tool_descriptions = Slot(uri=D4D.toolDescriptions, name="machineAnnotationTools__tool_descriptions", curie=D4D.curie('toolDescriptions'), model_uri=DATA_SHEETS_SCHEMA.machineAnnotationTools__tool_descriptions, domain=None, range=Optional[Union[str, list[str]]]) -slots.machineAnnotationTools__tool_accuracy = Slot(uri=D4DPREPROCESSING.tool_accuracy, name="machineAnnotationTools__tool_accuracy", curie=D4DPREPROCESSING.curie('tool_accuracy'), +slots.machineAnnotationTools__tool_accuracy = Slot(uri=D4D.toolAccuracy, name="machineAnnotationTools__tool_accuracy", curie=D4D.curie('toolAccuracy'), model_uri=DATA_SHEETS_SCHEMA.machineAnnotationTools__tool_accuracy, domain=None, range=Optional[Union[str, list[str]]]) -slots.existingUse__examples = Slot(uri=D4DUSES.examples, name="existingUse__examples", curie=D4DUSES.curie('examples'), +slots.existingUse__examples = Slot(uri=SCHEMA.example, name="existingUse__examples", curie=SCHEMA.curie('example'), model_uri=DATA_SHEETS_SCHEMA.existingUse__examples, domain=None, range=Optional[Union[str, list[str]]]) slots.useRepository__repository_url = Slot(uri=D4DUSES.repository_url, name="useRepository__repository_url", curie=D4DUSES.curie('repository_url'), @@ -4054,10 +4353,10 @@ class slots: slots.intendedUse__usage_notes = Slot(uri=D4DUSES.usage_notes, name="intendedUse__usage_notes", curie=D4DUSES.curie('usage_notes'), model_uri=DATA_SHEETS_SCHEMA.intendedUse__usage_notes, domain=None, range=Optional[str]) -slots.intendedUse__use_category = Slot(uri=D4DUSES.use_category, name="intendedUse__use_category", curie=D4DUSES.curie('use_category'), +slots.intendedUse__use_category = Slot(uri=D4D.useCategory, name="intendedUse__use_category", curie=D4D.curie('useCategory'), model_uri=DATA_SHEETS_SCHEMA.intendedUse__use_category, domain=None, range=Optional[Union[str, list[str]]]) -slots.prohibitedUse__prohibition_reason = Slot(uri=D4DUSES.prohibition_reason, name="prohibitedUse__prohibition_reason", curie=D4DUSES.curie('prohibition_reason'), +slots.prohibitedUse__prohibition_reason = Slot(uri=D4D.prohibitionReason, name="prohibitedUse__prohibition_reason", curie=D4D.curie('prohibitionReason'), model_uri=DATA_SHEETS_SCHEMA.prohibitedUse__prohibition_reason, domain=None, range=Optional[Union[str, list[str]]]) slots.thirdPartySharing__is_shared = Slot(uri=DCTERMS.accessRights, name="thirdPartySharing__is_shared", curie=DCTERMS.curie('accessRights'), @@ -4075,34 +4374,34 @@ class slots: slots.maintainer__maintainer_details = Slot(uri=DCTERMS.description, name="maintainer__maintainer_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.maintainer__maintainer_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.erratum__erratum_url = Slot(uri=D4DMAINTENANCE.erratum_url, name="erratum__erratum_url", curie=D4DMAINTENANCE.curie('erratum_url'), +slots.erratum__erratum_url = Slot(uri=DCAT.accessURL, name="erratum__erratum_url", curie=DCAT.curie('accessURL'), model_uri=DATA_SHEETS_SCHEMA.erratum__erratum_url, domain=None, range=Optional[Union[str, URI]]) slots.erratum__erratum_details = Slot(uri=DCTERMS.description, name="erratum__erratum_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.erratum__erratum_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.updatePlan__frequency = Slot(uri=D4DMAINTENANCE.frequency, name="updatePlan__frequency", curie=D4DMAINTENANCE.curie('frequency'), +slots.updatePlan__frequency = Slot(uri=D4D.frequency, name="updatePlan__frequency", curie=D4D.curie('frequency'), model_uri=DATA_SHEETS_SCHEMA.updatePlan__frequency, domain=None, range=Optional[str]) slots.updatePlan__update_details = Slot(uri=DCTERMS.description, name="updatePlan__update_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.updatePlan__update_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.retentionLimits__retention_period = Slot(uri=D4DMAINTENANCE.retention_period, name="retentionLimits__retention_period", curie=D4DMAINTENANCE.curie('retention_period'), +slots.retentionLimits__retention_period = Slot(uri=D4D.retentionPeriod, name="retentionLimits__retention_period", curie=D4D.curie('retentionPeriod'), model_uri=DATA_SHEETS_SCHEMA.retentionLimits__retention_period, domain=None, range=Optional[str]) slots.retentionLimits__retention_details = Slot(uri=DCTERMS.description, name="retentionLimits__retention_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.retentionLimits__retention_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.versionAccess__latest_version_doi = Slot(uri=D4DMAINTENANCE.latest_version_doi, name="versionAccess__latest_version_doi", curie=D4DMAINTENANCE.curie('latest_version_doi'), +slots.versionAccess__latest_version_doi = Slot(uri=SCHEMA.identifier, name="versionAccess__latest_version_doi", curie=SCHEMA.curie('identifier'), model_uri=DATA_SHEETS_SCHEMA.versionAccess__latest_version_doi, domain=None, range=Optional[str]) -slots.versionAccess__versions_available = Slot(uri=D4DMAINTENANCE.versions_available, name="versionAccess__versions_available", curie=D4DMAINTENANCE.curie('versions_available'), +slots.versionAccess__versions_available = Slot(uri=D4D.versionsAvailable, name="versionAccess__versions_available", curie=D4D.curie('versionsAvailable'), model_uri=DATA_SHEETS_SCHEMA.versionAccess__versions_available, domain=None, range=Optional[Union[str, list[str]]]) slots.versionAccess__version_details = Slot(uri=DCTERMS.description, name="versionAccess__version_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.versionAccess__version_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.extensionMechanism__contribution_url = Slot(uri=D4DMAINTENANCE.contribution_url, name="extensionMechanism__contribution_url", curie=D4DMAINTENANCE.curie('contribution_url'), +slots.extensionMechanism__contribution_url = Slot(uri=DCAT.landingPage, name="extensionMechanism__contribution_url", curie=DCAT.curie('landingPage'), model_uri=DATA_SHEETS_SCHEMA.extensionMechanism__contribution_url, domain=None, range=Optional[Union[str, URI]]) slots.extensionMechanism__extension_details = Slot(uri=DCTERMS.description, name="extensionMechanism__extension_details", curie=DCTERMS.curie('description'), @@ -4129,47 +4428,71 @@ class slots: slots.consentRevocation__revocation_details = Slot(uri=DCTERMS.description, name="consentRevocation__revocation_details", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.consentRevocation__revocation_details, domain=None, range=Optional[Union[str, list[str]]]) -slots.humanSubjectResearch__involves_human_subjects = Slot(uri=D4DHUMAN.involves_human_subjects, name="humanSubjectResearch__involves_human_subjects", curie=D4DHUMAN.curie('involves_human_subjects'), +slots.humanSubjectResearch__involves_human_subjects = Slot(uri=D4D.involvesHumanSubjects, name="humanSubjectResearch__involves_human_subjects", curie=D4D.curie('involvesHumanSubjects'), model_uri=DATA_SHEETS_SCHEMA.humanSubjectResearch__involves_human_subjects, domain=None, range=Optional[Union[bool, Bool]]) -slots.humanSubjectResearch__irb_approval = Slot(uri=D4DHUMAN.irb_approval, name="humanSubjectResearch__irb_approval", curie=D4DHUMAN.curie('irb_approval'), +slots.humanSubjectResearch__irb_approval = Slot(uri=D4D.irbApproval, name="humanSubjectResearch__irb_approval", curie=D4D.curie('irbApproval'), model_uri=DATA_SHEETS_SCHEMA.humanSubjectResearch__irb_approval, domain=None, range=Optional[Union[str, list[str]]]) -slots.humanSubjectResearch__ethics_review_board = Slot(uri=D4DHUMAN.ethics_review_board, name="humanSubjectResearch__ethics_review_board", curie=D4DHUMAN.curie('ethics_review_board'), +slots.humanSubjectResearch__ethics_review_board = Slot(uri=D4D.ethicsReviewBoard, name="humanSubjectResearch__ethics_review_board", curie=D4D.curie('ethicsReviewBoard'), model_uri=DATA_SHEETS_SCHEMA.humanSubjectResearch__ethics_review_board, domain=None, range=Optional[Union[str, list[str]]]) -slots.humanSubjectResearch__special_populations = Slot(uri=D4DHUMAN.special_populations, name="humanSubjectResearch__special_populations", curie=D4DHUMAN.curie('special_populations'), +slots.humanSubjectResearch__special_populations = Slot(uri=D4D.specialPopulations, name="humanSubjectResearch__special_populations", curie=D4D.curie('specialPopulations'), model_uri=DATA_SHEETS_SCHEMA.humanSubjectResearch__special_populations, domain=None, range=Optional[Union[str, list[str]]]) -slots.humanSubjectResearch__regulatory_compliance = Slot(uri=D4DHUMAN.regulatory_compliance, name="humanSubjectResearch__regulatory_compliance", curie=D4DHUMAN.curie('regulatory_compliance'), +slots.humanSubjectResearch__regulatory_compliance = Slot(uri=D4D.regulatoryCompliance, name="humanSubjectResearch__regulatory_compliance", curie=D4D.curie('regulatoryCompliance'), model_uri=DATA_SHEETS_SCHEMA.humanSubjectResearch__regulatory_compliance, domain=None, range=Optional[Union[str, list[str]]]) -slots.informedConsent__consent_obtained = Slot(uri=D4DHUMAN.consent_obtained, name="informedConsent__consent_obtained", curie=D4DHUMAN.curie('consent_obtained'), +slots.informedConsent__consent_obtained = Slot(uri=D4D.consentObtained, name="informedConsent__consent_obtained", curie=D4D.curie('consentObtained'), model_uri=DATA_SHEETS_SCHEMA.informedConsent__consent_obtained, domain=None, range=Optional[Union[bool, Bool]]) -slots.informedConsent__consent_type = Slot(uri=D4DHUMAN.consent_type, name="informedConsent__consent_type", curie=D4DHUMAN.curie('consent_type'), +slots.informedConsent__consent_type = Slot(uri=D4D.consentType, name="informedConsent__consent_type", curie=D4D.curie('consentType'), model_uri=DATA_SHEETS_SCHEMA.informedConsent__consent_type, domain=None, range=Optional[Union[str, list[str]]]) -slots.informedConsent__consent_documentation = Slot(uri=D4DHUMAN.consent_documentation, name="informedConsent__consent_documentation", curie=D4DHUMAN.curie('consent_documentation'), +slots.informedConsent__consent_documentation = Slot(uri=D4D.consentDocumentation, name="informedConsent__consent_documentation", curie=D4D.curie('consentDocumentation'), model_uri=DATA_SHEETS_SCHEMA.informedConsent__consent_documentation, domain=None, range=Optional[Union[str, list[str]]]) -slots.informedConsent__withdrawal_mechanism = Slot(uri=D4DHUMAN.withdrawal_mechanism, name="informedConsent__withdrawal_mechanism", curie=D4DHUMAN.curie('withdrawal_mechanism'), +slots.informedConsent__withdrawal_mechanism = Slot(uri=D4D.withdrawalMechanism, name="informedConsent__withdrawal_mechanism", curie=D4D.curie('withdrawalMechanism'), model_uri=DATA_SHEETS_SCHEMA.informedConsent__withdrawal_mechanism, domain=None, range=Optional[Union[str, list[str]]]) -slots.informedConsent__consent_scope = Slot(uri=D4DHUMAN.consent_scope, name="informedConsent__consent_scope", curie=D4DHUMAN.curie('consent_scope'), +slots.informedConsent__consent_scope = Slot(uri=D4D.consentScope, name="informedConsent__consent_scope", curie=D4D.curie('consentScope'), model_uri=DATA_SHEETS_SCHEMA.informedConsent__consent_scope, domain=None, range=Optional[Union[str, list[str]]]) -slots.vulnerablePopulations__vulnerable_groups_included = Slot(uri=D4DHUMAN.vulnerable_groups_included, name="vulnerablePopulations__vulnerable_groups_included", curie=D4DHUMAN.curie('vulnerable_groups_included'), - model_uri=DATA_SHEETS_SCHEMA.vulnerablePopulations__vulnerable_groups_included, domain=None, range=Optional[Union[bool, Bool]]) +slots.participantPrivacy__anonymization_method = Slot(uri=D4D.anonymizationMethod, name="participantPrivacy__anonymization_method", curie=D4D.curie('anonymizationMethod'), + model_uri=DATA_SHEETS_SCHEMA.participantPrivacy__anonymization_method, domain=None, range=Optional[Union[str, list[str]]]) + +slots.participantPrivacy__reidentification_risk = Slot(uri=D4D.reidentificationRisk, name="participantPrivacy__reidentification_risk", curie=D4D.curie('reidentificationRisk'), + model_uri=DATA_SHEETS_SCHEMA.participantPrivacy__reidentification_risk, domain=None, range=Optional[Union[str, list[str]]]) + +slots.participantPrivacy__privacy_techniques = Slot(uri=D4D.privacyTechniques, name="participantPrivacy__privacy_techniques", curie=D4D.curie('privacyTechniques'), + model_uri=DATA_SHEETS_SCHEMA.participantPrivacy__privacy_techniques, domain=None, range=Optional[Union[str, list[str]]]) + +slots.participantPrivacy__data_linkage = Slot(uri=D4D.dataLinkage, name="participantPrivacy__data_linkage", curie=D4D.curie('dataLinkage'), + model_uri=DATA_SHEETS_SCHEMA.participantPrivacy__data_linkage, domain=None, range=Optional[Union[str, list[str]]]) + +slots.humanSubjectCompensation__compensation_provided = Slot(uri=D4D.compensationProvided, name="humanSubjectCompensation__compensation_provided", curie=D4D.curie('compensationProvided'), + model_uri=DATA_SHEETS_SCHEMA.humanSubjectCompensation__compensation_provided, domain=None, range=Optional[Union[bool, Bool]]) -slots.vulnerablePopulations__special_protections = Slot(uri=D4DHUMAN.special_protections, name="vulnerablePopulations__special_protections", curie=D4DHUMAN.curie('special_protections'), - model_uri=DATA_SHEETS_SCHEMA.vulnerablePopulations__special_protections, domain=None, range=Optional[Union[str, list[str]]]) +slots.humanSubjectCompensation__compensation_type = Slot(uri=D4D.compensationType, name="humanSubjectCompensation__compensation_type", curie=D4D.curie('compensationType'), + model_uri=DATA_SHEETS_SCHEMA.humanSubjectCompensation__compensation_type, domain=None, range=Optional[Union[str, list[str]]]) -slots.vulnerablePopulations__assent_procedures = Slot(uri=D4DHUMAN.assent_procedures, name="vulnerablePopulations__assent_procedures", curie=D4DHUMAN.curie('assent_procedures'), - model_uri=DATA_SHEETS_SCHEMA.vulnerablePopulations__assent_procedures, domain=None, range=Optional[Union[str, list[str]]]) +slots.humanSubjectCompensation__compensation_amount = Slot(uri=D4D.compensationAmount, name="humanSubjectCompensation__compensation_amount", curie=D4D.curie('compensationAmount'), + model_uri=DATA_SHEETS_SCHEMA.humanSubjectCompensation__compensation_amount, domain=None, range=Optional[Union[str, list[str]]]) -slots.vulnerablePopulations__guardian_consent = Slot(uri=D4DHUMAN.guardian_consent, name="vulnerablePopulations__guardian_consent", curie=D4DHUMAN.curie('guardian_consent'), - model_uri=DATA_SHEETS_SCHEMA.vulnerablePopulations__guardian_consent, domain=None, range=Optional[Union[str, list[str]]]) +slots.humanSubjectCompensation__compensation_rationale = Slot(uri=D4D.compensationRationale, name="humanSubjectCompensation__compensation_rationale", curie=D4D.curie('compensationRationale'), + model_uri=DATA_SHEETS_SCHEMA.humanSubjectCompensation__compensation_rationale, domain=None, range=Optional[Union[str, list[str]]]) + +slots.atRiskPopulations__at_risk_groups_included = Slot(uri=D4D.atRiskGroupsIncluded, name="atRiskPopulations__at_risk_groups_included", curie=D4D.curie('atRiskGroupsIncluded'), + model_uri=DATA_SHEETS_SCHEMA.atRiskPopulations__at_risk_groups_included, domain=None, range=Optional[Union[bool, Bool]]) + +slots.atRiskPopulations__special_protections = Slot(uri=D4D.specialProtections, name="atRiskPopulations__special_protections", curie=D4D.curie('specialProtections'), + model_uri=DATA_SHEETS_SCHEMA.atRiskPopulations__special_protections, domain=None, range=Optional[Union[str, list[str]]]) + +slots.atRiskPopulations__assent_procedures = Slot(uri=D4D.assentProcedures, name="atRiskPopulations__assent_procedures", curie=D4D.curie('assentProcedures'), + model_uri=DATA_SHEETS_SCHEMA.atRiskPopulations__assent_procedures, domain=None, range=Optional[Union[str, list[str]]]) + +slots.atRiskPopulations__guardian_consent = Slot(uri=D4D.guardianConsent, name="atRiskPopulations__guardian_consent", curie=D4D.curie('guardianConsent'), + model_uri=DATA_SHEETS_SCHEMA.atRiskPopulations__guardian_consent, domain=None, range=Optional[Union[str, list[str]]]) slots.licenseAndUseTerms__license_terms = Slot(uri=DCTERMS.license, name="licenseAndUseTerms__license_terms", curie=DCTERMS.curie('license'), model_uri=DATA_SHEETS_SCHEMA.licenseAndUseTerms__license_terms, domain=None, range=Optional[Union[str, list[str]]]) @@ -4186,13 +4509,13 @@ class slots: slots.exportControlRegulatoryRestrictions__regulatory_restrictions = Slot(uri=DCTERMS.accessRights, name="exportControlRegulatoryRestrictions__regulatory_restrictions", curie=DCTERMS.curie('accessRights'), model_uri=DATA_SHEETS_SCHEMA.exportControlRegulatoryRestrictions__regulatory_restrictions, domain=None, range=Optional[Union[str, list[str]]]) -slots.exportControlRegulatoryRestrictions__hipaa_compliant = Slot(uri=D4DDATAGOVERNANCE.hipaa_compliant, name="exportControlRegulatoryRestrictions__hipaa_compliant", curie=D4DDATAGOVERNANCE.curie('hipaa_compliant'), +slots.exportControlRegulatoryRestrictions__hipaa_compliant = Slot(uri=D4D.hipaaCompliant, name="exportControlRegulatoryRestrictions__hipaa_compliant", curie=D4D.curie('hipaaCompliant'), model_uri=DATA_SHEETS_SCHEMA.exportControlRegulatoryRestrictions__hipaa_compliant, domain=None, range=Optional[Union[str, "ComplianceStatusEnum"]]) -slots.exportControlRegulatoryRestrictions__other_compliance = Slot(uri=D4DDATAGOVERNANCE.other_compliance, name="exportControlRegulatoryRestrictions__other_compliance", curie=D4DDATAGOVERNANCE.curie('other_compliance'), +slots.exportControlRegulatoryRestrictions__other_compliance = Slot(uri=D4D.otherCompliance, name="exportControlRegulatoryRestrictions__other_compliance", curie=D4D.curie('otherCompliance'), model_uri=DATA_SHEETS_SCHEMA.exportControlRegulatoryRestrictions__other_compliance, domain=None, range=Optional[Union[str, list[str]]]) -slots.exportControlRegulatoryRestrictions__confidentiality_level = Slot(uri=D4DDATAGOVERNANCE.confidentiality_level, name="exportControlRegulatoryRestrictions__confidentiality_level", curie=D4DDATAGOVERNANCE.curie('confidentiality_level'), +slots.exportControlRegulatoryRestrictions__confidentiality_level = Slot(uri=D4D.confidentialityLevel, name="exportControlRegulatoryRestrictions__confidentiality_level", curie=D4D.curie('confidentialityLevel'), model_uri=DATA_SHEETS_SCHEMA.exportControlRegulatoryRestrictions__confidentiality_level, domain=None, range=Optional[Union[str, "ConfidentialityLevelEnum"]]) slots.exportControlRegulatoryRestrictions__governance_committee_contact = Slot(uri=SCHEMA.contactPoint, name="exportControlRegulatoryRestrictions__governance_committee_contact", curie=SCHEMA.curie('contactPoint'), @@ -4207,7 +4530,7 @@ class slots: slots.variableMetadata__unit = Slot(uri=QUDT.unit, name="variableMetadata__unit", curie=QUDT.curie('unit'), model_uri=DATA_SHEETS_SCHEMA.variableMetadata__unit, domain=None, range=Optional[Union[str, URIorCURIE]]) -slots.variableMetadata__missing_value_code = Slot(uri=D4DVARIABLES.missing_value_code, name="variableMetadata__missing_value_code", curie=D4DVARIABLES.curie('missing_value_code'), +slots.variableMetadata__missing_value_code = Slot(uri=D4D.missingValueCode, name="variableMetadata__missing_value_code", curie=D4D.curie('missingValueCode'), model_uri=DATA_SHEETS_SCHEMA.variableMetadata__missing_value_code, domain=None, range=Optional[Union[str, list[str]]]) slots.variableMetadata__minimum_value = Slot(uri=SCHEMA.minValue, name="variableMetadata__minimum_value", curie=SCHEMA.curie('minValue'), @@ -4225,29 +4548,53 @@ class slots: slots.variableMetadata__is_identifier = Slot(uri=SCHEMA.identifier, name="variableMetadata__is_identifier", curie=SCHEMA.curie('identifier'), model_uri=DATA_SHEETS_SCHEMA.variableMetadata__is_identifier, domain=None, range=Optional[Union[bool, Bool]]) -slots.variableMetadata__is_sensitive = Slot(uri=D4DVARIABLES.is_sensitive, name="variableMetadata__is_sensitive", curie=D4DVARIABLES.curie('is_sensitive'), +slots.variableMetadata__is_sensitive = Slot(uri=D4D.isSensitive, name="variableMetadata__is_sensitive", curie=D4D.curie('isSensitive'), model_uri=DATA_SHEETS_SCHEMA.variableMetadata__is_sensitive, domain=None, range=Optional[Union[bool, Bool]]) -slots.variableMetadata__precision = Slot(uri=D4DVARIABLES.precision, name="variableMetadata__precision", curie=D4DVARIABLES.curie('precision'), +slots.variableMetadata__precision = Slot(uri=SCHEMA.valuePrecision, name="variableMetadata__precision", curie=SCHEMA.curie('valuePrecision'), model_uri=DATA_SHEETS_SCHEMA.variableMetadata__precision, domain=None, range=Optional[int]) slots.variableMetadata__measurement_technique = Slot(uri=SCHEMA.measurementTechnique, name="variableMetadata__measurement_technique", curie=SCHEMA.curie('measurementTechnique'), model_uri=DATA_SHEETS_SCHEMA.variableMetadata__measurement_technique, domain=None, range=Optional[str]) -slots.variableMetadata__derivation = Slot(uri=D4DVARIABLES.derivation, name="variableMetadata__derivation", curie=D4DVARIABLES.curie('derivation'), +slots.variableMetadata__derivation = Slot(uri=DCTERMS.provenance, name="variableMetadata__derivation", curie=DCTERMS.curie('provenance'), model_uri=DATA_SHEETS_SCHEMA.variableMetadata__derivation, domain=None, range=Optional[str]) -slots.variableMetadata__quality_notes = Slot(uri=D4DVARIABLES.quality_notes, name="variableMetadata__quality_notes", curie=D4DVARIABLES.curie('quality_notes'), +slots.variableMetadata__quality_notes = Slot(uri=DCTERMS.description, name="variableMetadata__quality_notes", curie=DCTERMS.curie('description'), model_uri=DATA_SHEETS_SCHEMA.variableMetadata__quality_notes, domain=None, range=Optional[Union[str, list[str]]]) -slots.DatasetCollection_resources = Slot(uri=DATA_SHEETS_SCHEMA.resources, name="DatasetCollection_resources", curie=DATA_SHEETS_SCHEMA.curie('resources'), +slots.file__file_type = Slot(uri=D4D.fileType, name="file__file_type", curie=D4D.curie('fileType'), + model_uri=DATA_SHEETS_SCHEMA.file__file_type, domain=None, range=Optional[Union[str, "FileTypeEnum"]]) + +slots.fileCollection__collection_type = Slot(uri=D4D.collectionType, name="fileCollection__collection_type", curie=D4D.curie('collectionType'), + model_uri=DATA_SHEETS_SCHEMA.fileCollection__collection_type, domain=None, range=Optional[Union[Union[str, "FileCollectionTypeEnum"], list[Union[str, "FileCollectionTypeEnum"]]]]) + +slots.fileCollection__file_count = Slot(uri=D4D.fileCount, name="fileCollection__file_count", curie=D4D.curie('fileCount'), + model_uri=DATA_SHEETS_SCHEMA.fileCollection__file_count, domain=None, range=Optional[int]) + +slots.fileCollection__total_bytes = Slot(uri=DCAT.byteSize, name="fileCollection__total_bytes", curie=DCAT.curie('byteSize'), + model_uri=DATA_SHEETS_SCHEMA.fileCollection__total_bytes, domain=None, range=Optional[int]) + +slots.DatasetCollection_resources = Slot(uri=SCHEMA.hasPart, name="DatasetCollection_resources", curie=SCHEMA.curie('hasPart'), model_uri=DATA_SHEETS_SCHEMA.DatasetCollection_resources, domain=DatasetCollection, range=Optional[Union[dict[Union[str, DatasetId], Union[dict, "Dataset"]], list[Union[dict, "Dataset"]]]]) slots.Dataset_external_resources = Slot(uri=DCTERMS.references, name="Dataset_external_resources", curie=DCTERMS.curie('references'), model_uri=DATA_SHEETS_SCHEMA.Dataset_external_resources, domain=Dataset, range=Optional[Union[Union[dict, "ExternalResource"], list[Union[dict, "ExternalResource"]]]]) -slots.Dataset_resources = Slot(uri=DATA_SHEETS_SCHEMA.resources, name="Dataset_resources", curie=DATA_SHEETS_SCHEMA.curie('resources'), +slots.Dataset_resources = Slot(uri=SCHEMA.hasPart, name="Dataset_resources", curie=SCHEMA.curie('hasPart'), model_uri=DATA_SHEETS_SCHEMA.Dataset_resources, domain=Dataset, range=Optional[Union[dict[Union[str, DatasetId], Union[dict, "Dataset"]], list[Union[dict, "Dataset"]]]]) slots.ExternalResource_external_resources = Slot(uri=DCTERMS.references, name="ExternalResource_external_resources", curie=DCTERMS.curie('references'), model_uri=DATA_SHEETS_SCHEMA.ExternalResource_external_resources, domain=ExternalResource, range=Optional[Union[str, list[str]]]) + +slots.FileCollection_path = Slot(uri=SCHEMA.contentUrl, name="FileCollection_path", curie=SCHEMA.curie('contentUrl'), + model_uri=DATA_SHEETS_SCHEMA.FileCollection_path, domain=FileCollection, range=Optional[str]) + +slots.FileCollection_compression = Slot(uri=DCAT.compressFormat, name="FileCollection_compression", curie=DCAT.curie('compressFormat'), + model_uri=DATA_SHEETS_SCHEMA.FileCollection_compression, domain=FileCollection, range=Optional[Union[str, "CompressionEnum"]]) + +slots.FileCollection_external_resources = Slot(uri=DCTERMS.references, name="FileCollection_external_resources", curie=DCTERMS.curie('references'), + model_uri=DATA_SHEETS_SCHEMA.FileCollection_external_resources, domain=FileCollection, range=Optional[Union[Union[dict, ExternalResource], list[Union[dict, ExternalResource]]]]) + +slots.FileCollection_resources = Slot(uri=SCHEMA.hasPart, name="FileCollection_resources", curie=SCHEMA.curie('hasPart'), + model_uri=DATA_SHEETS_SCHEMA.FileCollection_resources, domain=FileCollection, range=Optional[Union[dict[Union[str, DatasetId], Union[dict, Dataset]], list[Union[dict, Dataset]]]]) diff --git a/src/data_sheets_schema/schema/D4D_Base_import.yaml b/src/data_sheets_schema/schema/D4D_Base_import.yaml index ba560088..a846a0a9 100644 --- a/src/data_sheets_schema/schema/D4D_Base_import.yaml +++ b/src/data_sheets_schema/schema/D4D_Base_import.yaml @@ -369,8 +369,9 @@ slots: resources: description: >- - Sub-resources or component datasets. Used in DatasetCollection to contain - Dataset objects, and in Dataset to allow nested resource structures. + Sub-resources or component items. In DatasetCollection, contains Dataset objects. + In Dataset, contains nested Dataset objects. In FileCollection, contains nested + FileCollection objects. The specific range is defined via slot_usage in each class. range: Dataset multivalued: true slot_uri: schema:hasPart diff --git a/src/data_sheets_schema/schema/D4D_FileCollection.yaml b/src/data_sheets_schema/schema/D4D_FileCollection.yaml new file mode 100644 index 00000000..25813955 --- /dev/null +++ b/src/data_sheets_schema/schema/D4D_FileCollection.yaml @@ -0,0 +1,188 @@ +--- +id: "https://w3id.org/bridge2ai/data-sheets-schema/file-collection" +name: "data-sheets-schema-file-collection" +title: "Datasheets for Datasets – File Collection Module" +description: > + Module defining FileCollection class for representing collections of + files with shared characteristics within datasets. +license: MIT +see_also: + - "https://bridge2ai.github.io/data-sheets-schema" + +prefixes: + d4d: https://w3id.org/bridge2ai/data-sheets-schema/ + dcat: http://www.w3.org/ns/dcat# + schema: http://schema.org/ + dcterms: http://purl.org/dc/terms/ + +default_prefix: data_sheets_schema +default_range: string + +imports: + - linkml:types + - D4D_Base_import + +classes: + + File: + aliases: + - data file + - file + - file object + description: >- + A single file within a dataset or file collection. + Represents an individual data file, code file, documentation file, etc. + Maps to RO-Crate File entities. + is_a: Information + class_uri: schema:MediaObject + exact_mappings: + - schema:DigitalDocument + slots: + - bytes + - path + - format + - encoding + - compression + - media_type + - hash + - md5 + - sha256 + - dialect + attributes: + file_type: + description: >- + Semantic type or purpose of this file (e.g., data_file, code_file, + documentation_file, metadata_file). + range: FileTypeEnum + slot_uri: d4d:fileType + + FileCollection: + aliases: + - file collection + - data files + - file group + description: >- + A collection of files with shared characteristics (format, purpose, structure). + Represents a logical grouping of related files within a dataset, such as + all training data files, all image files, or all raw data files. + Maps to RO-Crate Dataset entities via schema:hasPart relationships. + is_a: Information + class_uri: dcat:Dataset + exact_mappings: + - schema:Dataset + close_mappings: + - dcat:Distribution + slots: + - path + - compression + - external_resources + - resources + slot_usage: + path: + description: >- + Path or URL to the FileCollection. May be a directory path, archive file path, + or download URL depending on how the collection is distributed. + compression: + description: >- + Compression format if the collection is packaged as a compressed archive + (e.g., gzip, zip, bzip2). Omit this field for uncompressed collections or + purely logical groupings. + external_resources: + description: >- + External files or URLs referenced by this file collection. + range: ExternalResource + multivalued: true + inlined_as_list: true + resources: + description: >- + Individual files or nested file collections within this collection. + Allows hierarchical file organization with both File objects and + nested FileCollection objects. + any_of: + - range: File + - range: FileCollection + multivalued: true + inlined_as_list: true + attributes: + collection_type: + description: >- + Type(s) of content in this file collection. A collection may have + multiple types, for example a collection containing both raw_data + and documentation files would have both types listed. + range: FileCollectionTypeEnum + slot_uri: d4d:collectionType + multivalued: true + file_count: + description: Number of files in this collection. + range: integer + slot_uri: d4d:fileCount + total_bytes: + description: Total size of all files in bytes. + range: integer + slot_uri: dcat:byteSize + +enums: + FileTypeEnum: + description: Types of individual files within datasets. + permissible_values: + data_file: + description: A data file containing dataset content + meaning: schema:DataDownload + code_file: + description: A source code or script file + meaning: schema:SoftwareSourceCode + documentation_file: + description: A documentation file (README, guide, etc.) + meaning: schema:Documentation + metadata_file: + description: A metadata or annotation file + meaning: dcat:CatalogRecord + configuration_file: + description: A configuration or settings file + meaning: d4d:ConfigurationFile + notebook_file: + description: A computational notebook file (Jupyter, R Markdown, etc.) + meaning: d4d:NotebookFile + image_file: + description: An image or visualization file + meaning: schema:ImageObject + archive_file: + description: An archive or compressed file + meaning: d4d:ArchiveFile + other: + description: Other file type + meaning: d4d:OtherFile + + FileCollectionTypeEnum: + description: Types of file collections within datasets. + permissible_values: + raw_data: + description: Raw, unprocessed data files + meaning: d4d:RawData + processed_data: + description: Cleaned, processed, or transformed data files + meaning: d4d:ProcessedData + training_split: + description: Files designated for model training + meaning: d4d:TrainingSplit + test_split: + description: Files designated for model testing + meaning: d4d:TestSplit + validation_split: + description: Files designated for model validation + meaning: d4d:ValidationSplit + documentation: + description: Documentation files (README, codebook, etc.) + meaning: schema:Documentation + metadata: + description: Metadata or annotation files + meaning: dcat:CatalogRecord + code: + description: Code or script files + meaning: schema:SoftwareSourceCode + supplementary: + description: Supplementary materials + meaning: schema:SupplementalMaterial + other: + description: Other file collection type + meaning: d4d:OtherFileCollection diff --git a/src/data_sheets_schema/schema/data_sheets_schema.yaml b/src/data_sheets_schema/schema/data_sheets_schema.yaml index ef4407f9..ccd416b3 100644 --- a/src/data_sheets_schema/schema/data_sheets_schema.yaml +++ b/src/data_sheets_schema/schema/data_sheets_schema.yaml @@ -47,6 +47,7 @@ imports: - D4D_Human - D4D_Data_Governance - D4D_Variables + - D4D_FileCollection ## TYPES ## @@ -94,27 +95,50 @@ classes: read, manipulated, transformed, and otherwise interpreted. is_a: Information slots: - - bytes - - dialect - - encoding - - format - - hash - - md5 - - media_type - - path - - sha256 - external_resources - resources slot_usage: external_resources: + description: >- + External resources referenced at the dataset level (e.g., related publications, + repositories, documentation). For file-level external resources, use + FileCollection.external_resources. range: ExternalResource + multivalued: true inlined_as_list: true resources: description: >- Sub-resources or component datasets that are part of this dataset. - Allows datasets to contain nested resource structures. + Note: For file collections, use the file_collections attribute instead. + range: Dataset + multivalued: true inlined_as_list: true attributes: + # FileCollection module + file_collections: + description: >- + Collections of files within this dataset. Each collection represents + a logical grouping of files with shared characteristics (e.g., all + training data, all image files, all raw data files). Maps to nested + RO-Crate Dataset entities via schema:hasPart. + slot_uri: schema:hasPart + range: FileCollection + multivalued: true + inlined_as_list: true + exact_mappings: + - dcat:distribution + total_file_count: + description: >- + Total number of files across all file collections in this dataset. + Can be aggregated from file_collections[].file_count. + range: integer + slot_uri: d4d:totalFileCount + total_size_bytes: + description: >- + Total size of all files in bytes across all file collections. + Can be aggregated from file_collections[].total_bytes. + range: integer + slot_uri: dcat:byteSize # Motivation module classes purposes: slot_uri: d4d:purposes diff --git a/src/data_sheets_schema/schema/data_sheets_schema_all.yaml b/src/data_sheets_schema/schema/data_sheets_schema_all.yaml index 697c6d1f..c7edfb8a 100644 --- a/src/data_sheets_schema/schema/data_sheets_schema_all.yaml +++ b/src/data_sheets_schema/schema/data_sheets_schema_all.yaml @@ -1128,6 +1128,92 @@ enums: description: Complex structured objects or nested data structures. broad_mappings: - schema:StructuredValue + FileTypeEnum: + name: FileTypeEnum + description: Types of individual files within datasets. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + permissible_values: + data_file: + text: data_file + description: A data file containing dataset content + meaning: schema:DataDownload + code_file: + text: code_file + description: A source code or script file + meaning: schema:SoftwareSourceCode + documentation_file: + text: documentation_file + description: A documentation file (README, guide, etc.) + meaning: schema:Documentation + metadata_file: + text: metadata_file + description: A metadata or annotation file + meaning: dcat:CatalogRecord + configuration_file: + text: configuration_file + description: A configuration or settings file + meaning: d4d:ConfigurationFile + notebook_file: + text: notebook_file + description: A computational notebook file (Jupyter, R Markdown, etc.) + meaning: d4d:NotebookFile + image_file: + text: image_file + description: An image or visualization file + meaning: schema:ImageObject + archive_file: + text: archive_file + description: An archive or compressed file + meaning: d4d:ArchiveFile + other: + text: other + description: Other file type + meaning: d4d:OtherFile + FileCollectionTypeEnum: + name: FileCollectionTypeEnum + description: Types of file collections within datasets. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + permissible_values: + raw_data: + text: raw_data + description: Raw, unprocessed data files + meaning: d4d:RawData + processed_data: + text: processed_data + description: Cleaned, processed, or transformed data files + meaning: d4d:ProcessedData + training_split: + text: training_split + description: Files designated for model training + meaning: d4d:TrainingSplit + test_split: + text: test_split + description: Files designated for model testing + meaning: d4d:TestSplit + validation_split: + text: validation_split + description: Files designated for model validation + meaning: d4d:ValidationSplit + documentation: + text: documentation + description: Documentation files (README, codebook, etc.) + meaning: schema:Documentation + metadata: + text: metadata + description: Metadata or annotation files + meaning: dcat:CatalogRecord + code: + text: code + description: Code or script files + meaning: schema:SoftwareSourceCode + supplementary: + text: supplementary + description: Supplementary materials + meaning: schema:SupplementalMaterial + other: + text: other + description: Other file collection type + meaning: d4d:OtherFileCollection slots: same_as: name: same_as @@ -1160,6 +1246,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File language: name: language description: language in which the information is expressed @@ -1172,6 +1259,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File publisher: name: publisher from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -1181,6 +1269,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File range: uriorcurie issued: name: issued @@ -1191,6 +1280,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File range: datetime page: name: page @@ -1201,6 +1291,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File dialect: name: dialect description: Specific format dialect or variation (e.g., CSV dialect, JSON-LD @@ -1208,21 +1299,22 @@ slots: from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: schema:encodingFormat domain_of: - - Dataset + - File bytes: name: bytes description: Size of the data in bytes. from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: dcat:byteSize domain_of: - - Dataset + - File range: integer path: name: path from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: schema:contentUrl domain_of: - - Dataset + - File + - FileCollection download_url: name: download_url description: URL from which the data can be downloaded. This is not the same as @@ -1237,6 +1329,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File range: uri format: name: format @@ -1245,7 +1338,7 @@ slots: from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: dcterms:format domain_of: - - Dataset + - File range: FormatEnum encoding: name: encoding @@ -1253,7 +1346,7 @@ slots: from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: dcat:mediaType domain_of: - - Dataset + - File range: EncodingEnum compression: name: compression @@ -1262,6 +1355,8 @@ slots: slot_uri: dcat:compressFormat domain_of: - Information + - File + - FileCollection - DatasetCollection - Dataset - DataSubset @@ -1274,7 +1369,7 @@ slots: - schema:encodingFormat slot_uri: dcat:mediaType domain_of: - - Dataset + - File range: MediaTypeEnum hash: name: hash @@ -1282,21 +1377,21 @@ slots: from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: dcterms:identifier domain_of: - - Dataset + - File md5: name: md5 description: md5 hash of the data from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: dcterms:identifier domain_of: - - Dataset + - File sha256: name: sha256 description: sha256 hash of the data from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: dcterms:identifier domain_of: - - Dataset + - File conforms_to: name: conforms_to from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -1306,6 +1401,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File conforms_to_schema: name: conforms_to_schema from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -1315,6 +1411,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File conforms_to_class: name: conforms_to_class from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -1324,6 +1421,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File license: name: license from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -1334,6 +1432,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File keywords: name: keywords from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -1343,6 +1442,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File multivalued: true version: name: version @@ -1354,6 +1454,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File created_by: name: created_by from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -1363,6 +1464,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File created_on: name: created_on from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -1372,6 +1474,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File range: datetime last_updated_on: name: last_updated_on @@ -1382,6 +1485,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File range: datetime modified_by: name: modified_by @@ -1392,6 +1496,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File status: name: status from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -1401,6 +1506,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File was_derived_from: name: was_derived_from from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -1412,6 +1518,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File doi: name: doi description: digital object identifier @@ -1422,6 +1529,7 @@ slots: - DatasetCollection - Dataset - DataSubset + - File pattern: 10\.\d{4,}\/.+ external_resources: name: external_resources @@ -1433,17 +1541,22 @@ slots: domain_of: - Dataset - ExternalResource + - FileCollection - DataSubset multivalued: true resources: name: resources - description: Sub-resources or component datasets. Used in DatasetCollection to - contain Dataset objects, and in Dataset to allow nested resource structures. + description: Sub-resources or component items. In DatasetCollection, contains + Dataset objects. In Dataset, contains nested Dataset objects. In FileCollection, + contains nested FileCollection objects. The specific range is defined via slot_usage + in each class. from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: schema:hasPart domain_of: - DatasetCollection - Dataset + - FileCollection + - DataSubset range: Dataset multivalued: true classes: @@ -1470,8 +1583,10 @@ classes: attributes: resources: name: resources - description: Sub-resources or component datasets. Used in DatasetCollection - to contain Dataset objects, and in Dataset to allow nested resource structures. + description: Sub-resources or component items. In DatasetCollection, contains + Dataset objects. In Dataset, contains nested Dataset objects. In FileCollection, + contains nested FileCollection objects. The specific range is defined via + slot_usage in each class. from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: schema:hasPart alias: resources @@ -1479,6 +1594,8 @@ classes: domain_of: - DatasetCollection - Dataset + - FileCollection + - DataSubset range: Dataset multivalued: true inlined_as_list: true @@ -1491,6 +1608,8 @@ classes: owner: DatasetCollection domain_of: - Information + - File + - FileCollection - DatasetCollection - Dataset - DataSubset @@ -1506,6 +1625,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string conforms_to_class: name: conforms_to_class @@ -1518,6 +1638,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string conforms_to_schema: name: conforms_to_schema @@ -1530,6 +1651,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string created_by: name: created_by @@ -1542,6 +1664,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string created_on: name: created_on @@ -1554,6 +1677,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: datetime doi: name: doi @@ -1567,6 +1691,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string pattern: 10\.\d{4,}\/.+ download_url: @@ -1585,6 +1710,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: uri issued: name: issued @@ -1597,6 +1723,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: datetime keywords: name: keywords @@ -1609,6 +1736,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string multivalued: true language: @@ -1625,6 +1753,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string last_updated_on: name: last_updated_on @@ -1637,6 +1766,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: datetime license: name: license @@ -1650,6 +1780,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string modified_by: name: modified_by @@ -1662,6 +1793,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string page: name: page @@ -1674,6 +1806,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string publisher: name: publisher @@ -1686,6 +1819,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: uriorcurie status: name: status @@ -1698,6 +1832,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string title: name: title @@ -1711,6 +1846,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string version: name: version @@ -1724,6 +1860,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string was_derived_from: name: was_derived_from @@ -1738,6 +1875,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string id: name: id @@ -1763,6 +1901,67 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: uriorcurie name: name: name @@ -1787,6 +1986,67 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string description: name: description @@ -1812,6 +2072,66 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string tree_root: true Dataset: @@ -1827,28 +2147,65 @@ classes: - schema:DataDownload is_a: Information slots: - - bytes - - dialect - - encoding - - format - - hash - - md5 - - media_type - - path - - sha256 - external_resources - resources slot_usage: external_resources: name: external_resources + description: External resources referenced at the dataset level (e.g., related + publications, repositories, documentation). For file-level external resources, + use FileCollection.external_resources. range: ExternalResource + multivalued: true inlined_as_list: true resources: name: resources - description: Sub-resources or component datasets that are part of this dataset. - Allows datasets to contain nested resource structures. + description: 'Sub-resources or component datasets that are part of this dataset. + Note: For file collections, use the file_collections attribute instead.' + range: Dataset + multivalued: true inlined_as_list: true attributes: + file_collections: + name: file_collections + description: Collections of files within this dataset. Each collection represents + a logical grouping of files with shared characteristics (e.g., all training + data, all image files, all raw data files). Maps to nested RO-Crate Dataset + entities via schema:hasPart. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - dcat:distribution + slot_uri: schema:hasPart + alias: file_collections + owner: Dataset + domain_of: + - Dataset + range: FileCollection + multivalued: true + inlined: true + inlined_as_list: true + total_file_count: + name: total_file_count + description: Total number of files across all file collections in this dataset. + Can be aggregated from file_collections[].file_count. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: d4d:totalFileCount + alias: total_file_count + owner: Dataset + domain_of: + - Dataset + range: integer + total_size_bytes: + name: total_size_bytes + description: Total size of all files in bytes across all file collections. + Can be aggregated from file_collections[].total_bytes. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:byteSize + alias: total_size_bytes + owner: Dataset + domain_of: + - Dataset + range: integer purposes: name: purposes from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -2574,127 +2931,37 @@ classes: multivalued: true inlined: true inlined_as_list: true - bytes: - name: bytes - description: Size of the data in bytes. + external_resources: + name: external_resources + description: External resources referenced at the dataset level (e.g., related + publications, repositories, documentation). For file-level external resources, + use FileCollection.external_resources. from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcat:byteSize - alias: bytes + slot_uri: dcterms:references + alias: external_resources owner: Dataset domain_of: - Dataset - range: integer - dialect: - name: dialect - description: Specific format dialect or variation (e.g., CSV dialect, JSON-LD - profile). + - ExternalResource + - FileCollection + - DataSubset + range: ExternalResource + multivalued: true + inlined: true + inlined_as_list: true + resources: + name: resources + description: 'Sub-resources or component datasets that are part of this dataset. + Note: For file collections, use the file_collections attribute instead.' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: schema:encodingFormat - alias: dialect - owner: Dataset - domain_of: - - Dataset - range: string - encoding: - name: encoding - description: the character encoding of the data - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcat:mediaType - alias: encoding - owner: Dataset - domain_of: - - Dataset - range: EncodingEnum - format: - name: format - description: The file format, physical medium, or dimensions of a resource. - This should be a file extension or MIME type. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:format - alias: format - owner: Dataset - domain_of: - - Dataset - range: FormatEnum - hash: - name: hash - description: hash of the data - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:identifier - alias: hash - owner: Dataset - domain_of: - - Dataset - range: string - md5: - name: md5 - description: md5 hash of the data - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:identifier - alias: md5 - owner: Dataset - domain_of: - - Dataset - range: string - media_type: - name: media_type - description: The media type of the data. This should be a MIME type. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - schema:encodingFormat - slot_uri: dcat:mediaType - alias: media_type - owner: Dataset - domain_of: - - Dataset - range: MediaTypeEnum - path: - name: path - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: schema:contentUrl - alias: path - owner: Dataset - domain_of: - - Dataset - range: string - sha256: - name: sha256 - description: sha256 hash of the data - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:identifier - alias: sha256 - owner: Dataset - domain_of: - - Dataset - range: string - external_resources: - name: external_resources - description: Links or identifiers for external resources. Can be used either - as a list of ExternalResource objects (in Dataset) or as a list of URL strings - (within ExternalResource class). - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:references - alias: external_resources - owner: Dataset - domain_of: - - Dataset - - ExternalResource - - DataSubset - range: ExternalResource - multivalued: true - inlined: true - inlined_as_list: true - resources: - name: resources - description: Sub-resources or component datasets that are part of this dataset. - Allows datasets to contain nested resource structures. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: schema:hasPart - alias: resources + slot_uri: schema:hasPart + alias: resources owner: Dataset domain_of: - DatasetCollection - Dataset + - FileCollection + - DataSubset range: Dataset multivalued: true inlined: true @@ -2708,6 +2975,8 @@ classes: owner: Dataset domain_of: - Information + - File + - FileCollection - DatasetCollection - Dataset - DataSubset @@ -2723,6 +2992,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string conforms_to_class: name: conforms_to_class @@ -2735,6 +3005,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string conforms_to_schema: name: conforms_to_schema @@ -2747,6 +3018,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string created_by: name: created_by @@ -2759,6 +3031,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string created_on: name: created_on @@ -2771,6 +3044,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: datetime doi: name: doi @@ -2784,6 +3058,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string pattern: 10\.\d{4,}\/.+ download_url: @@ -2802,6 +3077,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: uri issued: name: issued @@ -2814,6 +3090,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: datetime keywords: name: keywords @@ -2826,6 +3103,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string multivalued: true language: @@ -2842,6 +3120,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string last_updated_on: name: last_updated_on @@ -2854,6 +3133,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: datetime license: name: license @@ -2867,6 +3147,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string modified_by: name: modified_by @@ -2879,6 +3160,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string page: name: page @@ -2891,6 +3173,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string publisher: name: publisher @@ -2903,6 +3186,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: uriorcurie status: name: status @@ -2915,6 +3199,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string title: name: title @@ -2928,6 +3213,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string version: name: version @@ -2941,6 +3227,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string was_derived_from: name: was_derived_from @@ -2955,6 +3242,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string id: name: id @@ -2980,6 +3268,67 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: uriorcurie required: true name: @@ -3005,6 +3354,67 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string description: name: description @@ -3030,133 +3440,100 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string - class_uri: dcat:Distribution - DataSubset: - name: DataSubset - description: A subset of a dataset, likely containing multiple files of multiple - potential purposes and properties. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - is_a: Dataset - attributes: - is_data_split: - name: is_data_split - description: Is this subset a split of the larger dataset, e.g., is it a set - for model training, testing, or validation? - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - alias: is_data_split - owner: DataSubset - domain_of: - - DataSubset - range: boolean - is_subpopulation: - name: is_subpopulation - description: Is this subset a subpopulation of the larger dataset, e.g., is - it a set of data for a specific demographic? - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - alias: is_subpopulation - owner: DataSubset - domain_of: - - DataSubset - range: boolean - bytes: - name: bytes - description: Size of the data in bytes. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcat:byteSize - alias: bytes - owner: DataSubset - domain_of: - - Dataset - range: integer - dialect: - name: dialect - description: Specific format dialect or variation (e.g., CSV dialect, JSON-LD - profile). - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: schema:encodingFormat - alias: dialect - owner: DataSubset - domain_of: - - Dataset - range: string - encoding: - name: encoding - description: the character encoding of the data - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcat:mediaType - alias: encoding - owner: DataSubset - domain_of: - - Dataset - range: EncodingEnum - format: - name: format - description: The file format, physical medium, or dimensions of a resource. - This should be a file extension or MIME type. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:format - alias: format - owner: DataSubset - domain_of: - - Dataset - range: FormatEnum - hash: - name: hash - description: hash of the data - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:identifier - alias: hash - owner: DataSubset - domain_of: - - Dataset - range: string - md5: - name: md5 - description: md5 hash of the data - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:identifier - alias: md5 - owner: DataSubset - domain_of: - - Dataset - range: string - media_type: - name: media_type - description: The media type of the data. This should be a MIME type. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - schema:encodingFormat - slot_uri: dcat:mediaType - alias: media_type - owner: DataSubset - domain_of: - - Dataset - range: MediaTypeEnum - path: - name: path + class_uri: dcat:Distribution + DataSubset: + name: DataSubset + description: A subset of a dataset, likely containing multiple files of multiple + potential purposes and properties. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + is_a: Dataset + attributes: + is_data_split: + name: is_data_split + description: Is this subset a split of the larger dataset, e.g., is it a set + for model training, testing, or validation? from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: schema:contentUrl - alias: path + alias: is_data_split owner: DataSubset domain_of: - - Dataset - range: string - sha256: - name: sha256 - description: sha256 hash of the data + - DataSubset + range: boolean + is_subpopulation: + name: is_subpopulation + description: Is this subset a subpopulation of the larger dataset, e.g., is + it a set of data for a specific demographic? from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:identifier - alias: sha256 + alias: is_subpopulation owner: DataSubset domain_of: - - Dataset - range: string + - DataSubset + range: boolean external_resources: name: external_resources - description: Links or identifiers for external resources. Can be used either - as a list of ExternalResource objects (in Dataset) or as a list of URL strings - (within ExternalResource class). + description: External resources referenced at the dataset level (e.g., related + publications, repositories, documentation). For file-level external resources, + use FileCollection.external_resources. from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: dcterms:references alias: external_resources @@ -3164,14 +3541,15 @@ classes: domain_of: - Dataset - ExternalResource + - FileCollection - DataSubset range: ExternalResource multivalued: true inlined_as_list: true resources: name: resources - description: Sub-resources or component datasets that are part of this dataset. - Allows datasets to contain nested resource structures. + description: 'Sub-resources or component datasets that are part of this dataset. + Note: For file collections, use the file_collections attribute instead.' from_schema: https://w3id.org/bridge2ai/data-sheets-schema slot_uri: schema:hasPart alias: resources @@ -3179,9 +3557,50 @@ classes: domain_of: - DatasetCollection - Dataset + - FileCollection + - DataSubset range: Dataset multivalued: true inlined_as_list: true + file_collections: + name: file_collections + description: Collections of files within this dataset. Each collection represents + a logical grouping of files with shared characteristics (e.g., all training + data, all image files, all raw data files). Maps to nested RO-Crate Dataset + entities via schema:hasPart. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - dcat:distribution + slot_uri: schema:hasPart + alias: file_collections + owner: DataSubset + domain_of: + - Dataset + range: FileCollection + multivalued: true + inlined_as_list: true + total_file_count: + name: total_file_count + description: Total number of files across all file collections in this dataset. + Can be aggregated from file_collections[].file_count. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: d4d:totalFileCount + alias: total_file_count + owner: DataSubset + domain_of: + - Dataset + range: integer + total_size_bytes: + name: total_size_bytes + description: Total size of all files in bytes across all file collections. + Can be aggregated from file_collections[].total_bytes. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:byteSize + alias: total_size_bytes + owner: DataSubset + domain_of: + - Dataset + range: integer purposes: name: purposes from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -3869,6 +4288,8 @@ classes: owner: DataSubset domain_of: - Information + - File + - FileCollection - DatasetCollection - Dataset - DataSubset @@ -3884,6 +4305,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string conforms_to_class: name: conforms_to_class @@ -3896,6 +4318,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string conforms_to_schema: name: conforms_to_schema @@ -3908,6 +4331,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string created_by: name: created_by @@ -3920,6 +4344,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string created_on: name: created_on @@ -3932,6 +4357,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: datetime doi: name: doi @@ -3945,6 +4371,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string pattern: 10\.\d{4,}\/.+ download_url: @@ -3963,6 +4390,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: uri issued: name: issued @@ -3975,6 +4403,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: datetime keywords: name: keywords @@ -3987,6 +4416,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string multivalued: true language: @@ -4003,6 +4433,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string last_updated_on: name: last_updated_on @@ -4015,6 +4446,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: datetime license: name: license @@ -4028,6 +4460,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string modified_by: name: modified_by @@ -4040,6 +4473,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string page: name: page @@ -4052,6 +4486,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string publisher: name: publisher @@ -4064,6 +4499,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: uriorcurie status: name: status @@ -4076,6 +4512,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string title: name: title @@ -4089,6 +4526,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string version: name: version @@ -4102,6 +4540,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string was_derived_from: name: was_derived_from @@ -4116,6 +4555,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string id: name: id @@ -4141,6 +4581,67 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: uriorcurie required: true name: @@ -4166,223 +4667,6 @@ classes: - Creator - FundingMechanism - Grantor - range: string - description: - name: description - description: A human-readable description for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:description - alias: description - owner: DataSubset - domain_of: - - NamedThing - - DatasetProperty - - DatasetRelationship - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor - range: string - NamedThing: - name: NamedThing - description: A generic grouping for any identifiable entity. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - attributes: - id: - name: id - description: A unique identifier for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:identifier - identifier: true - alias: id - owner: NamedThing - domain_of: - - NamedThing - - DatasetProperty - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor - range: uriorcurie - required: true - name: - name: name - description: A human-readable name for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:name - alias: name - owner: NamedThing - domain_of: - - NamedThing - - DatasetProperty - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor - range: string - description: - name: description - description: A human-readable description for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:description - alias: description - owner: NamedThing - domain_of: - - NamedThing - - DatasetProperty - - DatasetRelationship - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor - range: string - class_uri: schema:Thing - Organization: - name: Organization - description: Represents a group or organization. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - is_a: NamedThing - attributes: - id: - name: id - description: A unique identifier for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:identifier - identifier: true - alias: id - owner: Organization - domain_of: - - NamedThing - - DatasetProperty - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor - range: uriorcurie - required: true - name: - name: name - description: A human-readable name for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:name - alias: name - owner: Organization - domain_of: - - NamedThing - - DatasetProperty - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor - range: string - description: - name: description - description: A human-readable description for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:description - alias: description - owner: Organization - domain_of: - - NamedThing - - DatasetProperty - - DatasetRelationship - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor - range: string - class_uri: schema:Organization - DatasetProperty: - name: DatasetProperty - description: Represents a single property of a dataset, or a set of related properties. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - attributes: - id: - name: id - description: An optional identifier for this property. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:identifier - alias: id - owner: DatasetProperty - domain_of: - - DatasetCollection - - Dataset - - DataSubset - - NamedThing - - Organization - - DatasetProperty - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor - Grant - Instance - SamplingStrategy @@ -4442,21 +4726,24 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions - range: uriorcurie - name: - name: name - description: A human-readable name for this property. + - VariableMetadata + - File + range: string + description: + name: description + description: A human-readable description for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:name - alias: name - owner: DatasetProperty + slot_uri: schema:description + alias: description + owner: DataSubset domain_of: + - NamedThing + - DatasetProperty + - DatasetRelationship - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - Software - Person - Information @@ -4481,7 +4768,6 @@ classes: - Subpopulation - Deidentification - SensitiveElement - - DatasetRelationship - InstanceAcquisition - CollectionMechanism - DataCollector @@ -4525,22 +4811,29 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string - description: - name: description - description: A human-readable description for this property. + NamedThing: + name: NamedThing + description: A generic grouping for any identifiable entity. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + attributes: + id: + name: id + description: A unique identifier for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:description - alias: description - owner: DatasetProperty + slot_uri: schema:identifier + identifier: true + alias: id + owner: NamedThing domain_of: + - NamedThing + - DatasetProperty - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - - DatasetRelationship - Software - Person - Information @@ -4565,6 +4858,7 @@ classes: - Subpopulation - Deidentification - SensitiveElement + - DatasetRelationship - InstanceAcquisition - CollectionMechanism - DataCollector @@ -4608,21 +4902,34 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions - range: string - used_software: - name: used_software - description: What software was used as part of this dataset property? + - VariableMetadata + - File + range: uriorcurie + required: true + name: + name: name + description: A human-readable name for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: d4d:usedSoftware - alias: used_software - owner: DatasetProperty + slot_uri: schema:name + alias: name + owner: NamedThing domain_of: + - NamedThing - DatasetProperty + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information - Purpose - Task - AddressingGap - Creator - FundingMechanism + - Grantor + - Grant - Instance - SamplingStrategy - MissingInfo @@ -4681,53 +4988,101 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions - range: Software - multivalued: true - inlined: true - inlined_as_list: true - Software: - name: Software - description: A software program or library. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - schema:SoftwareApplication - is_a: NamedThing - attributes: - version: - name: version - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:softwareVersion - alias: version - owner: Software - domain_of: - - DatasetCollection - - Dataset - - DataSubset - - Software - - Information + - VariableMetadata + - File range: string - license: - name: license + description: + name: description + description: A human-readable description for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:license - alias: license - owner: Software + slot_uri: schema:description + alias: description + owner: NamedThing domain_of: + - NamedThing + - DatasetProperty + - DatasetRelationship - DatasetCollection - Dataset - DataSubset + - Organization - Software + - Person - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string - url: - name: url - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:url - alias: url - owner: Software - domain_of: - - Software - range: string + class_uri: schema:Thing + Organization: + name: Organization + description: Represents a group or organization. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + is_a: NamedThing + attributes: id: name: id description: A unique identifier for a thing. @@ -4735,7 +5090,7 @@ classes: slot_uri: schema:identifier identifier: true alias: id - owner: Software + owner: Organization domain_of: - NamedThing - DatasetProperty @@ -4752,6 +5107,67 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: uriorcurie required: true name: @@ -4760,7 +5176,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Software + owner: Organization domain_of: - NamedThing - DatasetProperty @@ -4777,94 +5193,79 @@ classes: - Creator - FundingMechanism - Grantor - range: string - description: - name: description - description: A human-readable description for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:description - alias: description - owner: Software - domain_of: - - NamedThing - - DatasetProperty + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement - DatasetRelationship - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor - range: string - class_uri: schema:SoftwareApplication - Person: - name: Person - description: An individual human being. This class represents a person in the - context of a specific dataset. Attributes like affiliation and email represent - the person's current or most relevant contact information for this dataset. - For stable cross-dataset identification, use the ORCID field. Note that contributor - roles (CRediT) are specified in the usage context (e.g., Creator class) rather - than on the Person directly, since roles vary by dataset. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - is_a: NamedThing - attributes: - affiliation: - name: affiliation - description: The organization(s) to which the person belongs in the context - of this dataset. May vary across datasets; multivalued to support multiple - affiliations. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:affiliation - alias: affiliation - owner: Person - domain_of: - - Person - range: Organization - multivalued: true - email: - name: email - description: The email address of the person. Represents current/preferred - contact information in the context of this dataset. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:email - alias: email - owner: Person - domain_of: - - Person - range: string - orcid: - name: orcid - description: 'ORCID (Open Researcher and Contributor ID) - a persistent digital - identifier for researchers. Format: 0000-0000-0000-0000 (16 digits in groups - of 4). Use this for stable cross-dataset identification.' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - exact_mappings: - - schema:identifier - slot_uri: schema:identifier - alias: orcid - owner: Person - domain_of: - - Person + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string - pattern: ^\d{4}-\d{4}-\d{4}-\d{3}[0-9X]$ - id: - name: id - description: A unique identifier for a thing. + description: + name: description + description: A human-readable description for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:identifier - identifier: true - alias: id - owner: Person + slot_uri: schema:description + alias: description + owner: Organization domain_of: - NamedThing - DatasetProperty + - DatasetRelationship - DatasetCollection - Dataset - DataSubset @@ -4878,22 +5279,87 @@ classes: - Creator - FundingMechanism - Grantor - range: uriorcurie - required: true - name: - name: name - description: A human-readable name for a thing. + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File + range: string + class_uri: schema:Organization + DatasetProperty: + name: DatasetProperty + description: Represents a single property of a dataset, or a set of related properties. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + attributes: + id: + name: id + description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:name - alias: name - owner: Person + slot_uri: schema:identifier + alias: id + owner: DatasetProperty domain_of: - - NamedThing - - DatasetProperty - DatasetCollection - Dataset - DataSubset + - NamedThing - Organization + - DatasetProperty - Software - Person - Information @@ -4903,22 +5369,80 @@ classes: - Creator - FundingMechanism - Grantor - range: string - description: - name: description - description: A human-readable description for a thing. + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + range: uriorcurie + name: + name: name + description: A human-readable name for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:description - alias: description - owner: Person + slot_uri: schema:name + alias: name + owner: DatasetProperty domain_of: - - NamedThing - - DatasetProperty - - DatasetRelationship - DatasetCollection - Dataset - DataSubset + - NamedThing - Organization + - DatasetProperty - Software - Person - Information @@ -4928,9 +5452,835 @@ classes: - Creator - FundingMechanism - Grantor - range: string - class_uri: schema:Person - Information: + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + range: string + description: + name: description + description: A human-readable description for this property. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:description + alias: description + owner: DatasetProperty + domain_of: + - DatasetCollection + - Dataset + - DataSubset + - NamedThing + - Organization + - DatasetProperty + - DatasetRelationship + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + range: string + used_software: + name: used_software + description: What software was used as part of this dataset property? + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: d4d:usedSoftware + alias: used_software + owner: DatasetProperty + domain_of: + - DatasetProperty + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + range: Software + multivalued: true + inlined: true + inlined_as_list: true + Software: + name: Software + description: A software program or library. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - schema:SoftwareApplication + is_a: NamedThing + attributes: + version: + name: version + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:softwareVersion + alias: version + owner: Software + domain_of: + - DatasetCollection + - Dataset + - DataSubset + - Software + - Information + range: string + license: + name: license + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:license + alias: license + owner: Software + domain_of: + - DatasetCollection + - Dataset + - DataSubset + - Software + - Information + range: string + url: + name: url + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:url + alias: url + owner: Software + domain_of: + - Software + range: string + id: + name: id + description: A unique identifier for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:identifier + identifier: true + alias: id + owner: Software + domain_of: + - NamedThing + - DatasetProperty + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File + range: uriorcurie + required: true + name: + name: name + description: A human-readable name for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:name + alias: name + owner: Software + domain_of: + - NamedThing + - DatasetProperty + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File + range: string + description: + name: description + description: A human-readable description for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:description + alias: description + owner: Software + domain_of: + - NamedThing + - DatasetProperty + - DatasetRelationship + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File + range: string + class_uri: schema:SoftwareApplication + Person: + name: Person + description: An individual human being. This class represents a person in the + context of a specific dataset. Attributes like affiliation and email represent + the person's current or most relevant contact information for this dataset. + For stable cross-dataset identification, use the ORCID field. Note that contributor + roles (CRediT) are specified in the usage context (e.g., Creator class) rather + than on the Person directly, since roles vary by dataset. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + is_a: NamedThing + attributes: + affiliation: + name: affiliation + description: The organization(s) to which the person belongs in the context + of this dataset. May vary across datasets; multivalued to support multiple + affiliations. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:affiliation + alias: affiliation + owner: Person + domain_of: + - Person + range: Organization + multivalued: true + email: + name: email + description: The email address of the person. Represents current/preferred + contact information in the context of this dataset. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:email + alias: email + owner: Person + domain_of: + - Person + range: string + orcid: + name: orcid + description: 'ORCID (Open Researcher and Contributor ID) - a persistent digital + identifier for researchers. Format: 0000-0000-0000-0000 (16 digits in groups + of 4). Use this for stable cross-dataset identification.' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + exact_mappings: + - schema:identifier + slot_uri: schema:identifier + alias: orcid + owner: Person + domain_of: + - Person + range: string + pattern: ^\d{4}-\d{4}-\d{4}-\d{3}[0-9X]$ + id: + name: id + description: A unique identifier for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:identifier + identifier: true + alias: id + owner: Person + domain_of: + - NamedThing + - DatasetProperty + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File + range: uriorcurie + required: true + name: + name: name + description: A human-readable name for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:name + alias: name + owner: Person + domain_of: + - NamedThing + - DatasetProperty + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File + range: string + description: + name: description + description: A human-readable description for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:description + alias: description + owner: Person + domain_of: + - NamedThing + - DatasetProperty + - DatasetRelationship + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File + range: string + class_uri: schema:Person + Information: name: Information description: Grouping for datasets and data files from_schema: https://w3id.org/bridge2ai/data-sheets-schema @@ -4968,6 +6318,8 @@ classes: owner: Information domain_of: - Information + - File + - FileCollection - DatasetCollection - Dataset - DataSubset @@ -4983,6 +6335,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string conforms_to_class: name: conforms_to_class @@ -4995,6 +6348,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string conforms_to_schema: name: conforms_to_schema @@ -5007,6 +6361,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string created_by: name: created_by @@ -5019,6 +6374,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string created_on: name: created_on @@ -5031,6 +6387,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: datetime doi: name: doi @@ -5044,6 +6401,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: string pattern: 10\.\d{4,}\/.+ download_url: @@ -5062,6 +6420,7 @@ classes: - DatasetCollection - Dataset - DataSubset + - File range: uri issued: name: issued @@ -5070,167 +6429,842 @@ classes: alias: issued owner: Information domain_of: - - Information + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: datetime + keywords: + name: keywords + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:keyword + alias: keywords + owner: Information + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + multivalued: true + language: + name: language + description: language in which the information is expressed + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - schema:inLanguage + slot_uri: dcterms:language + alias: language + owner: Information + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + last_updated_on: + name: last_updated_on + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:modified + alias: last_updated_on + owner: Information + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: datetime + license: + name: license + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:license + alias: license + owner: Information + domain_of: + - Software + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + modified_by: + name: modified_by + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:contributor + alias: modified_by + owner: Information + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + page: + name: page + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:landingPage + alias: page + owner: Information + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + publisher: + name: publisher + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:publisher + alias: publisher + owner: Information + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: uriorcurie + status: + name: status + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:type + alias: status + owner: Information + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + title: + name: title + description: the official title of the element + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:title + alias: title + owner: Information + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + version: + name: version + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:hasVersion + alias: version + owner: Information + domain_of: + - Software + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + was_derived_from: + name: was_derived_from + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - dcterms:source + slot_uri: prov:wasDerivedFrom + alias: was_derived_from + owner: Information + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + id: + name: id + description: A unique identifier for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:identifier + identifier: true + alias: id + owner: Information + domain_of: + - NamedThing + - DatasetProperty + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File + range: uriorcurie + required: true + name: + name: name + description: A human-readable name for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:name + alias: name + owner: Information + domain_of: + - NamedThing + - DatasetProperty - DatasetCollection - Dataset - DataSubset - range: datetime - keywords: - name: keywords - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcat:keyword - alias: keywords - owner: Information - domain_of: + - Organization + - Software + - Person - Information - - DatasetCollection - - Dataset - - DataSubset + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string - multivalued: true - language: - name: language - description: language in which the information is expressed - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - schema:inLanguage - slot_uri: dcterms:language - alias: language + description: + name: description + description: A human-readable description for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:description + alias: description owner: Information domain_of: - - Information + - NamedThing + - DatasetProperty + - DatasetRelationship - DatasetCollection - Dataset - DataSubset + - Organization + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string - last_updated_on: - name: last_updated_on - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:modified - alias: last_updated_on - owner: Information + FormatDialect: + name: FormatDialect + description: Additional format information for a file + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + attributes: + comment_prefix: + name: comment_prefix + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + alias: comment_prefix + owner: FormatDialect domain_of: - - Information - - DatasetCollection - - Dataset - - DataSubset - range: datetime - license: - name: license - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:license - alias: license - owner: Information + - FormatDialect + range: string + delimiter: + name: delimiter + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + alias: delimiter + owner: FormatDialect domain_of: - - Software - - Information - - DatasetCollection - - Dataset - - DataSubset + - FormatDialect range: string - modified_by: - name: modified_by - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:contributor - alias: modified_by - owner: Information + double_quote: + name: double_quote + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + alias: double_quote + owner: FormatDialect domain_of: - - Information - - DatasetCollection - - Dataset - - DataSubset + - FormatDialect range: string - page: - name: page - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcat:landingPage - alias: page - owner: Information + header: + name: header + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + alias: header + owner: FormatDialect + domain_of: + - FormatDialect + range: string + quote_char: + name: quote_char + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + alias: quote_char + owner: FormatDialect + domain_of: + - FormatDialect + range: string + Purpose: + name: Purpose + description: For what purpose was the dataset created? + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + is_a: DatasetProperty + attributes: + response: + name: response + description: Short explanation describing the primary purpose of creating + the dataset. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation + slot_uri: dcterms:description + alias: response + owner: Purpose domain_of: - - Information - - DatasetCollection - - Dataset - - DataSubset + - Purpose + - Task + - AddressingGap range: string - publisher: - name: publisher - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:publisher - alias: publisher - owner: Information + id: + name: id + description: An optional identifier for this property. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:identifier + alias: id + owner: Purpose domain_of: - - Information - DatasetCollection - Dataset - DataSubset + - NamedThing + - Organization + - DatasetProperty + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions range: uriorcurie - status: - name: status - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:type - alias: status - owner: Information + name: + name: name + description: A human-readable name for this property. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:name + alias: name + owner: Purpose domain_of: - - Information - DatasetCollection - Dataset - DataSubset + - NamedThing + - Organization + - DatasetProperty + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions range: string - title: - name: title - description: the official title of the element - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:title - alias: title - owner: Information + description: + name: description + description: A human-readable description for this property. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:description + alias: description + owner: Purpose domain_of: - - Information - DatasetCollection - Dataset - DataSubset + - NamedThing + - Organization + - DatasetProperty + - DatasetRelationship + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions range: string - version: - name: version - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:hasVersion - alias: version - owner: Information + used_software: + name: used_software + description: What software was used as part of this dataset property? + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: d4d:usedSoftware + alias: used_software + owner: Purpose domain_of: - - Software - - Information - - DatasetCollection - - Dataset - - DataSubset - range: string - was_derived_from: - name: was_derived_from - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - dcterms:source - slot_uri: prov:wasDerivedFrom - alias: was_derived_from - owner: Information + - DatasetProperty + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + range: Software + multivalued: true + inlined_as_list: true + Task: + name: Task + description: Was there a specific task in mind for the dataset's application? + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + is_a: DatasetProperty + attributes: + response: + name: response + description: Short explanation describing the specific task or tasks for which + this dataset was created. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation + slot_uri: dcterms:description + alias: response + owner: Task domain_of: - - Information - - DatasetCollection - - Dataset - - DataSubset + - Purpose + - Task + - AddressingGap range: string id: name: id - description: A unique identifier for a thing. + description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier - identifier: true alias: id - owner: Information + owner: Task domain_of: - - NamedThing - - DatasetProperty - DatasetCollection - Dataset - DataSubset + - NamedThing - Organization + - DatasetProperty - Software - Person - Information @@ -5240,22 +7274,80 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions range: uriorcurie - required: true name: name: name - description: A human-readable name for a thing. + description: A human-readable name for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Information + owner: Task domain_of: - - NamedThing - - DatasetProperty - DatasetCollection - Dataset - DataSubset + - NamedThing - Organization + - DatasetProperty - Software - Person - Information @@ -5265,22 +7357,81 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions range: string description: name: description - description: A human-readable description for a thing. + description: A human-readable description for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Information + owner: Task domain_of: - - NamedThing - - DatasetProperty - - DatasetRelationship - DatasetCollection - Dataset - DataSubset + - NamedThing - Organization + - DatasetProperty + - DatasetRelationship - Software - Person - Information @@ -5290,66 +7441,156 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions range: string - FormatDialect: - name: FormatDialect - description: Additional format information for a file - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - attributes: - comment_prefix: - name: comment_prefix - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - alias: comment_prefix - owner: FormatDialect - domain_of: - - FormatDialect - range: string - delimiter: - name: delimiter - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - alias: delimiter - owner: FormatDialect - domain_of: - - FormatDialect - range: string - double_quote: - name: double_quote - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - alias: double_quote - owner: FormatDialect - domain_of: - - FormatDialect - range: string - header: - name: header - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - alias: header - owner: FormatDialect - domain_of: - - FormatDialect - range: string - quote_char: - name: quote_char + used_software: + name: used_software + description: What software was used as part of this dataset property? from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - alias: quote_char - owner: FormatDialect + slot_uri: d4d:usedSoftware + alias: used_software + owner: Task domain_of: - - FormatDialect - range: string - Purpose: - name: Purpose - description: For what purpose was the dataset created? + - DatasetProperty + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + range: Software + multivalued: true + inlined: true + inlined_as_list: true + AddressingGap: + name: AddressingGap + description: Was there a specific gap that needed to be filled by creation of + the dataset? from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: response: name: response - description: Short explanation describing the primary purpose of creating - the dataset. + description: Short explanation of the knowledge or resource gap that this + dataset was intended to address. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation slot_uri: dcterms:description alias: response - owner: Purpose + owner: AddressingGap domain_of: - Purpose - Task @@ -5361,7 +7602,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: Purpose + owner: AddressingGap domain_of: - DatasetCollection - Dataset @@ -5444,7 +7685,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Purpose + owner: AddressingGap domain_of: - DatasetCollection - Dataset @@ -5527,7 +7768,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Purpose + owner: AddressingGap domain_of: - DatasetCollection - Dataset @@ -5610,7 +7851,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: Purpose + owner: AddressingGap domain_of: - DatasetProperty - Purpose @@ -5678,33 +7919,65 @@ classes: - ExportControlRegulatoryRestrictions range: Software multivalued: true + inlined: true inlined_as_list: true - Task: - name: Task - description: Was there a specific task in mind for the dataset's application? + Creator: + name: Creator + description: 'Who created the dataset (e.g., which team, research group) and on + behalf of which entity (e.g., company, institution, organization)? This may + also be considered a team. + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - response: - name: response - description: Short explanation describing the specific task or tasks for which - this dataset was created. + principal_investigator: + name: principal_investigator + description: A key individual (Principal Investigator) responsible for or + overseeing dataset creation. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation + exact_mappings: + - schema:creator + slot_uri: dcterms:creator + alias: principal_investigator + owner: Creator + domain_of: + - Creator + range: Person + affiliations: + name: affiliations + description: Organizations with which the creator or team is affiliated. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation + slot_uri: schema:affiliation + alias: affiliations + owner: Creator + domain_of: + - Creator + range: Organization + multivalued: true + inlined_as_list: true + credit_roles: + name: credit_roles + description: 'Contributor roles using the CRediT (Contributor Roles Taxonomy) + for the principal investigator or creator team. Specifies the specific contributions + made to this dataset (e.g., Conceptualization, Data Curation, Methodology). + Note: roles are specified here rather than on Person directly, since the + same person may have different roles across different datasets.' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation - slot_uri: dcterms:description - alias: response - owner: Task + slot_uri: d4d:creditRoles + alias: credit_roles + owner: Creator domain_of: - - Purpose - - Task - - AddressingGap - range: string + - Creator + range: CRediTRoleEnum + multivalued: true id: name: id description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: Task + owner: Creator domain_of: - DatasetCollection - Dataset @@ -5787,7 +8060,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Task + owner: Creator domain_of: - DatasetCollection - Dataset @@ -5870,7 +8143,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Task + owner: Creator domain_of: - DatasetCollection - Dataset @@ -5953,7 +8226,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: Task + owner: Creator domain_of: - DatasetProperty - Purpose @@ -6023,33 +8296,46 @@ classes: multivalued: true inlined: true inlined_as_list: true - AddressingGap: - name: AddressingGap - description: Was there a specific gap that needed to be filled by creation of - the dataset? + FundingMechanism: + name: FundingMechanism + description: 'Who funded the creation of the dataset? If there is an associated + grant, please provide the name of the grantor and the grant name and number. + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - response: - name: response - description: Short explanation of the knowledge or resource gap that this - dataset was intended to address. + grantor: + name: grantor + description: Name/identifier of the organization providing monetary or resource + support. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation - slot_uri: dcterms:description - alias: response - owner: AddressingGap + slot_uri: schema:funder + alias: grantor + owner: FundingMechanism domain_of: - - Purpose - - Task - - AddressingGap - range: string + - FundingMechanism + range: Grantor + grants: + name: grants + description: Grant mechanisms supporting dataset creation. Multiple grants + may fund a single dataset. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation + slot_uri: schema:funding + alias: grants + owner: FundingMechanism + domain_of: + - FundingMechanism + range: Grant + multivalued: true + inlined_as_list: true id: name: id description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: AddressingGap + owner: FundingMechanism domain_of: - DatasetCollection - Dataset @@ -6132,7 +8418,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: AddressingGap + owner: FundingMechanism domain_of: - DatasetCollection - Dataset @@ -6215,7 +8501,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: AddressingGap + owner: FundingMechanism domain_of: - DatasetCollection - Dataset @@ -6298,7 +8584,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: AddressingGap + owner: FundingMechanism domain_of: - DatasetProperty - Purpose @@ -6368,70 +8654,202 @@ classes: multivalued: true inlined: true inlined_as_list: true - Creator: - name: Creator - description: 'Who created the dataset (e.g., which team, research group) and on - behalf of which entity (e.g., company, institution, organization)? This may - also be considered a team. + Grantor: + name: Grantor + description: 'The name and/or identifier of the organization providing monetary + support or other resources supporting creation of the dataset. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - is_a: DatasetProperty + is_a: Organization attributes: - principal_investigator: - name: principal_investigator - description: A key individual (Principal Investigator) responsible for or - overseeing dataset creation. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation - exact_mappings: - - schema:creator - slot_uri: dcterms:creator - alias: principal_investigator - owner: Creator - domain_of: - - Creator - range: Person - affiliations: - name: affiliations - description: Organizations with which the creator or team is affiliated. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation - slot_uri: schema:affiliation - alias: affiliations - owner: Creator + id: + name: id + description: A unique identifier for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:identifier + identifier: true + alias: id + owner: Grantor domain_of: + - NamedThing + - DatasetProperty + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap - Creator - range: Organization - multivalued: true - inlined_as_list: true - credit_roles: - name: credit_roles - description: 'Contributor roles using the CRediT (Contributor Roles Taxonomy) - for the principal investigator or creator team. Specifies the specific contributions - made to this dataset (e.g., Conceptualization, Data Curation, Methodology). - Note: roles are specified here rather than on Person directly, since the - same person may have different roles across different datasets.' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation - slot_uri: d4d:creditRoles - alias: credit_roles - owner: Creator + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File + range: uriorcurie + required: true + name: + name: name + description: A human-readable name for a thing. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:name + alias: name + owner: Grantor domain_of: + - NamedThing + - DatasetProperty + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information + - Purpose + - Task + - AddressingGap - Creator - range: CRediTRoleEnum - multivalued: true - id: - name: id - description: An optional identifier for this property. + - FundingMechanism + - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + - VariableMetadata + - File + range: string + description: + name: description + description: A human-readable description for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:identifier - alias: id - owner: Creator + slot_uri: schema:description + alias: description + owner: Grantor domain_of: + - NamedThing + - DatasetProperty + - DatasetRelationship - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - Software - Person - Information @@ -6456,7 +8874,6 @@ classes: - Subpopulation - Deidentification - SensitiveElement - - DatasetRelationship - InstanceAcquisition - CollectionMechanism - DataCollector @@ -6500,21 +8917,43 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions - range: uriorcurie - name: - name: name - description: A human-readable name for this property. + - VariableMetadata + - File + range: string + Grant: + name: Grant + description: 'The name and/or identifier of the specific mechanism providing monetary + support or other resources supporting creation of the dataset. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + is_a: NamedThing + attributes: + grant_number: + name: grant_number + description: The alphanumeric identifier for the grant. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation + slot_uri: schema:identifier + alias: grant_number + owner: Grant + domain_of: + - Grant + range: string + id: + name: id + description: A unique identifier for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:name - alias: name - owner: Creator + slot_uri: schema:identifier + identifier: true + alias: id + owner: Grant domain_of: + - NamedThing + - DatasetProperty - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - Software - Person - Information @@ -6583,22 +9022,24 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions - range: string - description: - name: description - description: A human-readable description for this property. + - VariableMetadata + - File + range: uriorcurie + required: true + name: + name: name + description: A human-readable name for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:description - alias: description - owner: Creator + slot_uri: schema:name + alias: name + owner: Grant domain_of: + - NamedThing + - DatasetProperty - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - - DatasetRelationship - Software - Person - Information @@ -6623,6 +9064,7 @@ classes: - Subpopulation - Deidentification - SensitiveElement + - DatasetRelationship - InstanceAcquisition - CollectionMechanism - DataCollector @@ -6666,21 +9108,34 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string - used_software: - name: used_software - description: What software was used as part of this dataset property? + description: + name: description + description: A human-readable description for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: d4d:usedSoftware - alias: used_software - owner: Creator + slot_uri: schema:description + alias: description + owner: Grant domain_of: + - NamedThing - DatasetProperty + - DatasetRelationship + - DatasetCollection + - Dataset + - DataSubset + - Organization + - Software + - Person + - Information - Purpose - Task - AddressingGap - Creator - FundingMechanism + - Grantor + - Grant - Instance - SamplingStrategy - MissingInfo @@ -6695,7 +9150,6 @@ classes: - Subpopulation - Deidentification - SensitiveElement - - DatasetRelationship - InstanceAcquisition - CollectionMechanism - DataCollector @@ -6739,50 +9193,130 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions - range: Software - multivalued: true - inlined: true - inlined_as_list: true - FundingMechanism: - name: FundingMechanism - description: 'Who funded the creation of the dataset? If there is an associated - grant, please provide the name of the grantor and the grant name and number. + - VariableMetadata + - File + range: string + Instance: + name: Instance + description: 'What do the instances that comprise the dataset represent (e.g., + documents, photos, people, countries)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - grantor: - name: grantor - description: Name/identifier of the organization providing monetary or resource - support. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation - slot_uri: schema:funder - alias: grantor - owner: FundingMechanism + data_topic: + name: data_topic + description: 'General topic of each instance (e.g., from Bridge2AI standards). + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + values_from: + - B2AI_TOPIC + slot_uri: dcat:theme + alias: data_topic + owner: Instance domain_of: - - FundingMechanism - range: Grantor - grants: - name: grants - description: Grant mechanisms supporting dataset creation. Multiple grants - may fund a single dataset. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation - slot_uri: schema:funding - alias: grants - owner: FundingMechanism + - Instance + range: uriorcurie + instance_type: + name: instance_type + description: 'Multiple types of instances? (e.g., movies, users, and ratings). + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: dcterms:type + alias: instance_type + owner: Instance + domain_of: + - Instance + range: string + data_substrate: + name: data_substrate + description: 'Type of data (e.g., raw text, images) from Bridge2AI standards. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + values_from: + - B2AI_SUBSTRATE + slot_uri: dcterms:format + alias: data_substrate + owner: Instance + domain_of: + - Instance + range: uriorcurie + counts: + name: counts + description: 'How many instances are there in total (of each type, if appropriate)? + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: schema:numberOfItems + alias: counts + owner: Instance + domain_of: + - Instance + range: integer + label: + name: label + description: 'Is there a label or target associated with each instance? + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:hasLabel + alias: label + owner: Instance + domain_of: + - Instance + range: boolean + label_description: + name: label_description + description: 'If labeled, what pattern or format do labels follow? + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: schema:description + alias: label_description + owner: Instance + domain_of: + - Instance + range: string + sampling_strategies: + name: sampling_strategies + description: 'References to one or more SamplingStrategy objects. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:samplingStrategies + alias: sampling_strategies + owner: Instance + domain_of: + - Dataset + - DataSubset + - Instance + range: SamplingStrategy + multivalued: true + missing_information: + name: missing_information + description: 'References to one or more MissingInfo objects describing missing + data. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:missingInformation + alias: missing_information + owner: Instance domain_of: - - FundingMechanism - range: Grant + - Instance + range: MissingInfo multivalued: true - inlined_as_list: true id: name: id description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: FundingMechanism + owner: Instance domain_of: - DatasetCollection - Dataset @@ -6865,7 +9399,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: FundingMechanism + owner: Instance domain_of: - DatasetCollection - Dataset @@ -6948,7 +9482,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: FundingMechanism + owner: Instance domain_of: - DatasetCollection - Dataset @@ -7031,7 +9565,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: FundingMechanism + owner: Instance domain_of: - DatasetProperty - Purpose @@ -7101,124 +9635,120 @@ classes: multivalued: true inlined: true inlined_as_list: true - Grantor: - name: Grantor - description: 'The name and/or identifier of the organization providing monetary - support or other resources supporting creation of the dataset. + SamplingStrategy: + name: SamplingStrategy + description: 'Does the dataset contain all possible instances, or is it a sample + (not necessarily random) of instances from a larger set? If so, how representative + is it? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - is_a: Organization + is_a: DatasetProperty attributes: - id: - name: id - description: A unique identifier for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:identifier - identifier: true - alias: id - owner: Grantor + is_sample: + name: is_sample + description: Indicates whether it is a sample of a larger set. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:isSample + alias: is_sample + owner: SamplingStrategy domain_of: - - NamedThing - - DatasetProperty - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor - range: uriorcurie - required: true - name: - name: name - description: A human-readable name for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:name - alias: name - owner: Grantor + - SamplingStrategy + range: boolean + multivalued: true + is_random: + name: is_random + description: Indicates whether the sample is random. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:isRandom + alias: is_random + owner: SamplingStrategy domain_of: - - NamedThing - - DatasetProperty - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor + - SamplingStrategy + range: boolean + multivalued: true + source_data: + name: source_data + description: 'Description of the larger set from which the sample was drawn, + if any. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:sourceData + alias: source_data + owner: SamplingStrategy + domain_of: + - SamplingStrategy range: string - description: - name: description - description: A human-readable description for a thing. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + multivalued: true + is_representative: + name: is_representative + description: 'Indicates whether the sample is representative of the larger + set. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:isRepresentative + alias: is_representative + owner: SamplingStrategy + domain_of: + - SamplingStrategy + range: boolean + multivalued: true + representative_verification: + name: representative_verification + description: 'Explanation of how representativeness was validated or verified. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: schema:description - alias: description - owner: Grantor + alias: representative_verification + owner: SamplingStrategy domain_of: - - NamedThing - - DatasetProperty - - DatasetRelationship - - DatasetCollection - - Dataset - - DataSubset - - Organization - - Software - - Person - - Information - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Grantor + - SamplingStrategy range: string - Grant: - name: Grant - description: 'The name and/or identifier of the specific mechanism providing monetary - support or other resources supporting creation of the dataset. + multivalued: true + why_not_representative: + name: why_not_representative + description: 'Explanation of why the sample is not representative, if applicable. - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - is_a: NamedThing - attributes: - grant_number: - name: grant_number - description: The alphanumeric identifier for the grant. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/motivation - slot_uri: schema:identifier - alias: grant_number - owner: Grant + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:whyNotRepresentative + alias: why_not_representative + owner: SamplingStrategy domain_of: - - Grant + - SamplingStrategy + range: string + multivalued: true + strategies: + name: strategies + description: 'Description of the sampling strategy (deterministic, probabilistic, + etc.). + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:strategies + alias: strategies + owner: SamplingStrategy + domain_of: + - SamplingStrategy range: string + multivalued: true id: name: id - description: A unique identifier for a thing. + description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier - identifier: true alias: id - owner: Grant + owner: SamplingStrategy domain_of: - - NamedThing - - DatasetProperty - DatasetCollection - Dataset - DataSubset + - NamedThing - Organization + - DatasetProperty - Software - Person - Information @@ -7228,22 +9758,80 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions range: uriorcurie - required: true name: name: name - description: A human-readable name for a thing. + description: A human-readable name for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Grant + owner: SamplingStrategy domain_of: - - NamedThing - - DatasetProperty - DatasetCollection - Dataset - DataSubset + - NamedThing - Organization + - DatasetProperty - Software - Person - Information @@ -7253,22 +9841,81 @@ classes: - Creator - FundingMechanism - Grantor + - Grant + - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions range: string description: name: description - description: A human-readable description for a thing. + description: A human-readable description for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Grant + owner: SamplingStrategy domain_of: - - NamedThing - - DatasetProperty - - DatasetRelationship - DatasetCollection - Dataset - DataSubset + - NamedThing - Organization + - DatasetProperty + - DatasetRelationship - Software - Person - Information @@ -7278,120 +9925,175 @@ classes: - Creator - FundingMechanism - Grantor - range: string - Instance: - name: Instance - description: 'What do the instances that comprise the dataset represent (e.g., - documents, photos, people, countries)? - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - is_a: DatasetProperty - attributes: - data_topic: - name: data_topic - description: 'General topic of each instance (e.g., from Bridge2AI standards). - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - values_from: - - B2AI_TOPIC - slot_uri: dcat:theme - alias: data_topic - owner: Instance - domain_of: - - Instance - range: uriorcurie - instance_type: - name: instance_type - description: 'Multiple types of instances? (e.g., movies, users, and ratings). - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: dcterms:type - alias: instance_type - owner: Instance - domain_of: + - Grant - Instance + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions range: string - data_substrate: - name: data_substrate - description: 'Type of data (e.g., raw text, images) from Bridge2AI standards. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - values_from: - - B2AI_SUBSTRATE - slot_uri: dcterms:format - alias: data_substrate - owner: Instance - domain_of: - - Instance - range: uriorcurie - counts: - name: counts - description: 'How many instances are there in total (of each type, if appropriate)? - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: schema:numberOfItems - alias: counts - owner: Instance + used_software: + name: used_software + description: What software was used as part of this dataset property? + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: d4d:usedSoftware + alias: used_software + owner: SamplingStrategy domain_of: + - DatasetProperty + - Purpose + - Task + - AddressingGap + - Creator + - FundingMechanism - Instance - range: integer - label: - name: label - description: 'Is there a label or target associated with each instance? + - SamplingStrategy + - MissingInfo + - Relationships + - Splits + - DataAnomaly + - DatasetBias + - DatasetLimitation + - ExternalResource + - Confidentiality + - ContentWarning + - Subpopulation + - Deidentification + - SensitiveElement + - DatasetRelationship + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + range: Software + multivalued: true + inlined: true + inlined_as_list: true + MissingInfo: + name: MissingInfo + description: 'Is any information missing from individual instances? (e.g., unavailable + data) - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:hasLabel - alias: label - owner: Instance - domain_of: - - Instance - range: boolean - label_description: - name: label_description - description: 'If labeled, what pattern or format do labels follow? + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + is_a: DatasetProperty + attributes: + missing: + name: missing + description: 'Description of the missing data fields or elements. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: schema:description - alias: label_description - owner: Instance + slot_uri: dcterms:description + alias: missing + owner: MissingInfo domain_of: - - Instance + - MissingInfo range: string - sampling_strategies: - name: sampling_strategies - description: 'References to one or more SamplingStrategy objects. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:samplingStrategies - alias: sampling_strategies - owner: Instance - domain_of: - - Dataset - - DataSubset - - Instance - range: SamplingStrategy multivalued: true - missing_information: - name: missing_information - description: 'References to one or more MissingInfo objects describing missing - data. + why_missing: + name: why_missing + description: 'Explanation of why each piece of data is missing. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:missingInformation - alias: missing_information - owner: Instance + slot_uri: dcterms:description + alias: why_missing + owner: MissingInfo domain_of: - - Instance - range: MissingInfo + - MissingInfo + range: string multivalued: true id: name: id @@ -7399,7 +10101,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: Instance + owner: MissingInfo domain_of: - DatasetCollection - Dataset @@ -7482,7 +10184,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Instance + owner: MissingInfo domain_of: - DatasetCollection - Dataset @@ -7565,7 +10267,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Instance + owner: MissingInfo domain_of: - DatasetCollection - Dataset @@ -7648,7 +10350,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: Instance + owner: MissingInfo domain_of: - DatasetProperty - Purpose @@ -7718,104 +10420,27 @@ classes: multivalued: true inlined: true inlined_as_list: true - SamplingStrategy: - name: SamplingStrategy - description: 'Does the dataset contain all possible instances, or is it a sample - (not necessarily random) of instances from a larger set? If so, how representative - is it? + Relationships: + name: Relationships + description: 'Are relationships between individual instances made explicit (e.g., + users'' movie ratings, social network links)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - is_sample: - name: is_sample - description: Indicates whether it is a sample of a larger set. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:isSample - alias: is_sample - owner: SamplingStrategy - domain_of: - - SamplingStrategy - range: boolean - multivalued: true - is_random: - name: is_random - description: Indicates whether the sample is random. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:isRandom - alias: is_random - owner: SamplingStrategy - domain_of: - - SamplingStrategy - range: boolean - multivalued: true - source_data: - name: source_data - description: 'Description of the larger set from which the sample was drawn, - if any. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:sourceData - alias: source_data - owner: SamplingStrategy - domain_of: - - SamplingStrategy - range: string - multivalued: true - is_representative: - name: is_representative - description: 'Indicates whether the sample is representative of the larger - set. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:isRepresentative - alias: is_representative - owner: SamplingStrategy - domain_of: - - SamplingStrategy - range: boolean - multivalued: true - representative_verification: - name: representative_verification - description: 'Explanation of how representativeness was validated or verified. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: schema:description - alias: representative_verification - owner: SamplingStrategy - domain_of: - - SamplingStrategy - range: string - multivalued: true - why_not_representative: - name: why_not_representative - description: 'Explanation of why the sample is not representative, if applicable. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:whyNotRepresentative - alias: why_not_representative - owner: SamplingStrategy - domain_of: - - SamplingStrategy - range: string - multivalued: true - strategies: - name: strategies - description: 'Description of the sampling strategy (deterministic, probabilistic, - etc.). + relationship_details: + name: relationship_details + description: 'Details on relationships between instances (e.g., graph edges, + ratings). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:strategies - alias: strategies - owner: SamplingStrategy + slot_uri: dcterms:description + alias: relationship_details + owner: Relationships domain_of: - - SamplingStrategy + - Relationships range: string multivalued: true id: @@ -7824,7 +10449,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: SamplingStrategy + owner: Relationships domain_of: - DatasetCollection - Dataset @@ -7907,7 +10532,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: SamplingStrategy + owner: Relationships domain_of: - DatasetCollection - Dataset @@ -7990,7 +10615,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: SamplingStrategy + owner: Relationships domain_of: - DatasetCollection - Dataset @@ -8073,7 +10698,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: SamplingStrategy + owner: Relationships domain_of: - DatasetProperty - Purpose @@ -8143,39 +10768,26 @@ classes: multivalued: true inlined: true inlined_as_list: true - MissingInfo: - name: MissingInfo - description: 'Is any information missing from individual instances? (e.g., unavailable - data) + Splits: + name: Splits + description: 'Are there recommended data splits (e.g., training, validation, testing)? + If so, how are they defined and why? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - missing: - name: missing - description: 'Description of the missing data fields or elements. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: dcterms:description - alias: missing - owner: MissingInfo - domain_of: - - MissingInfo - range: string - multivalued: true - why_missing: - name: why_missing - description: 'Explanation of why each piece of data is missing. + split_details: + name: split_details + description: 'Details on recommended data splits and their rationale. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: dcterms:description - alias: why_missing - owner: MissingInfo + alias: split_details + owner: Splits domain_of: - - MissingInfo + - Splits range: string multivalued: true id: @@ -8184,7 +10796,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: MissingInfo + owner: Splits domain_of: - DatasetCollection - Dataset @@ -8267,7 +10879,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: MissingInfo + owner: Splits domain_of: - DatasetCollection - Dataset @@ -8350,7 +10962,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: MissingInfo + owner: Splits domain_of: - DatasetCollection - Dataset @@ -8433,7 +11045,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: MissingInfo + owner: Splits domain_of: - DatasetProperty - Purpose @@ -8503,27 +11115,25 @@ classes: multivalued: true inlined: true inlined_as_list: true - Relationships: - name: Relationships - description: 'Are relationships between individual instances made explicit (e.g., - users'' movie ratings, social network links)? + DataAnomaly: + name: DataAnomaly + description: 'Are there any errors, sources of noise, or redundancies in the dataset? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - relationship_details: - name: relationship_details - description: 'Details on relationships between instances (e.g., graph edges, - ratings). + anomaly_details: + name: anomaly_details + description: 'Details on errors, noise sources, or redundancies in the dataset. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: dcterms:description - alias: relationship_details - owner: Relationships + alias: anomaly_details + owner: DataAnomaly domain_of: - - Relationships + - DataAnomaly range: string multivalued: true id: @@ -8532,7 +11142,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: Relationships + owner: DataAnomaly domain_of: - DatasetCollection - Dataset @@ -8615,7 +11225,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Relationships + owner: DataAnomaly domain_of: - DatasetCollection - Dataset @@ -8698,7 +11308,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Relationships + owner: DataAnomaly domain_of: - DatasetCollection - Dataset @@ -8781,7 +11391,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: Relationships + owner: DataAnomaly domain_of: - DatasetProperty - Purpose @@ -8851,26 +11461,68 @@ classes: multivalued: true inlined: true inlined_as_list: true - Splits: - name: Splits - description: 'Are there recommended data splits (e.g., training, validation, testing)? - If so, how are they defined and why? + DatasetBias: + name: DatasetBias + description: 'Documents known biases present in the dataset. Biases are systematic + errors or prejudices that may affect the representativeness or fairness of the + data. Distinct from anomalies (data quality issues) and limitations (scope constraints). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - rai:dataBiases is_a: DatasetProperty attributes: - split_details: - name: split_details - description: 'Details on recommended data splits and their rationale. + bias_type: + name: bias_type + description: 'The type of bias identified, using standardized categories from + the Artificial Intelligence Ontology (AIO). + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:biasType + alias: bias_type + owner: DatasetBias + domain_of: + - DatasetBias + range: BiasTypeEnum + bias_description: + name: bias_description + description: 'Detailed description of how this bias manifests in the dataset, + including affected populations, features, or outcomes. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: dcterms:description - alias: split_details - owner: Splits + alias: bias_description + owner: DatasetBias domain_of: - - Splits + - DatasetBias + range: string + mitigation_strategy: + name: mitigation_strategy + description: 'Steps taken or recommended to mitigate this bias. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:mitigation_strategy + alias: mitigation_strategy + owner: DatasetBias + domain_of: + - DatasetBias + range: string + affected_subsets: + name: affected_subsets + description: 'Specific subsets or features of the dataset affected by this + bias. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:affectedSubsets + alias: affected_subsets + owner: DatasetBias + domain_of: + - DatasetBias range: string multivalued: true id: @@ -8879,7 +11531,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: Splits + owner: DatasetBias domain_of: - DatasetCollection - Dataset @@ -8962,7 +11614,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Splits + owner: DatasetBias domain_of: - DatasetCollection - Dataset @@ -9045,7 +11697,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Splits + owner: DatasetBias domain_of: - DatasetCollection - Dataset @@ -9128,7 +11780,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: Splits + owner: DatasetBias domain_of: - DatasetProperty - Purpose @@ -9198,34 +11850,74 @@ classes: multivalued: true inlined: true inlined_as_list: true - DataAnomaly: - name: DataAnomaly - description: 'Are there any errors, sources of noise, or redundancies in the dataset? + DatasetLimitation: + name: DatasetLimitation + description: 'Documents known limitations of the dataset that may affect its use + or interpretation. Distinct from biases (systematic errors) and anomalies (data + quality issues). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - rai:dataLimitations is_a: DatasetProperty attributes: - anomaly_details: - name: anomaly_details - description: 'Details on errors, noise sources, or redundancies in the dataset. + limitation_type: + name: limitation_type + description: 'Category of limitation (e.g., scope, coverage, temporal, methodological). + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:limitationType + alias: limitation_type + owner: DatasetLimitation + domain_of: + - DatasetLimitation + range: LimitationTypeEnum + limitation_description: + name: limitation_description + description: 'Detailed description of the limitation and its implications. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: dcterms:description - alias: anomaly_details - owner: DataAnomaly + alias: limitation_description + owner: DatasetLimitation domain_of: - - DataAnomaly + - DatasetLimitation + range: string + scope_impact: + name: scope_impact + description: 'How this limitation affects the scope or applicability of the + dataset. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:scopeImpact + alias: scope_impact + owner: DatasetLimitation + domain_of: + - DatasetLimitation + range: string + recommended_mitigation: + name: recommended_mitigation + description: 'Recommended approaches for users to address this limitation. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: d4d:recommendedMitigation + alias: recommended_mitigation + owner: DatasetLimitation + domain_of: + - DatasetLimitation range: string - multivalued: true id: name: id description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: DataAnomaly + owner: DatasetLimitation domain_of: - DatasetCollection - Dataset @@ -9308,7 +12000,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: DataAnomaly + owner: DatasetLimitation domain_of: - DatasetCollection - Dataset @@ -9391,7 +12083,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: DataAnomaly + owner: DatasetLimitation domain_of: - DatasetCollection - Dataset @@ -9474,7 +12166,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: DataAnomaly + owner: DatasetLimitation domain_of: - DatasetProperty - Purpose @@ -9544,68 +12236,78 @@ classes: multivalued: true inlined: true inlined_as_list: true - DatasetBias: - name: DatasetBias - description: 'Documents known biases present in the dataset. Biases are systematic - errors or prejudices that may affect the representativeness or fairness of the - data. Distinct from anomalies (data quality issues) and limitations (scope constraints). + ExternalResource: + name: ExternalResource + description: 'Is the dataset self-contained or does it rely on external resources + (e.g., websites, other datasets)? If external, are there guarantees that those + resources will remain available and unchanged? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - rai:dataBiases is_a: DatasetProperty + slots: + - external_resources + slot_usage: + external_resources: + name: external_resources + description: List of links or identifiers for external resources. + range: string attributes: - bias_type: - name: bias_type - description: 'The type of bias identified, using standardized categories from - the Artificial Intelligence Ontology (AIO). - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:biasType - alias: bias_type - owner: DatasetBias - domain_of: - - DatasetBias - range: BiasTypeEnum - bias_description: - name: bias_description - description: 'Detailed description of how this bias manifests in the dataset, - including affected populations, features, or outcomes. + future_guarantees: + name: future_guarantees + description: 'Explanation of any commitments that external resources will + remain available and stable over time. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: dcterms:description - alias: bias_description - owner: DatasetBias + alias: future_guarantees + owner: ExternalResource domain_of: - - DatasetBias + - ExternalResource range: string - mitigation_strategy: - name: mitigation_strategy - description: 'Steps taken or recommended to mitigate this bias. + multivalued: true + archival: + name: archival + description: 'Indication whether official archival versions of external resources + are included. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:mitigation_strategy - alias: mitigation_strategy - owner: DatasetBias + slot_uri: schema:archivedAt + alias: archival + owner: ExternalResource domain_of: - - DatasetBias - range: string - affected_subsets: - name: affected_subsets - description: 'Specific subsets or features of the dataset affected by this - bias. + - ExternalResource + range: boolean + multivalued: true + restrictions: + name: restrictions + description: 'Description of any restrictions or fees associated with external + resources. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:affectedSubsets - alias: affected_subsets - owner: DatasetBias + slot_uri: dcterms:accessRights + alias: restrictions + owner: ExternalResource domain_of: - - DatasetBias + - ExternalResource + - IPRestrictions + range: string + multivalued: true + external_resources: + name: external_resources + description: List of links or identifiers for external resources. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:references + alias: external_resources + owner: ExternalResource + domain_of: + - Dataset + - ExternalResource + - FileCollection + - DataSubset range: string multivalued: true id: @@ -9614,7 +12316,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: DatasetBias + owner: ExternalResource domain_of: - DatasetCollection - Dataset @@ -9697,7 +12399,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: DatasetBias + owner: ExternalResource domain_of: - DatasetCollection - Dataset @@ -9780,7 +12482,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: DatasetBias + owner: ExternalResource domain_of: - DatasetCollection - Dataset @@ -9863,7 +12565,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: DatasetBias + owner: ExternalResource domain_of: - DatasetProperty - Purpose @@ -9933,74 +12635,45 @@ classes: multivalued: true inlined: true inlined_as_list: true - DatasetLimitation: - name: DatasetLimitation - description: 'Documents known limitations of the dataset that may affect its use - or interpretation. Distinct from biases (systematic errors) and anomalies (data - quality issues). + Confidentiality: + name: Confidentiality + description: 'Does the dataset contain data that might be confidential (e.g., + protected by legal privilege, patient data, non-public communications)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - rai:dataLimitations is_a: DatasetProperty attributes: - limitation_type: - name: limitation_type - description: 'Category of limitation (e.g., scope, coverage, temporal, methodological). - - ' + confidential_elements_present: + name: confidential_elements_present + description: Indicates whether any confidential data elements are present. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:limitationType - alias: limitation_type - owner: DatasetLimitation + slot_uri: d4d:confidential_elements_present + alias: confidential_elements_present + owner: Confidentiality domain_of: - - DatasetLimitation - range: LimitationTypeEnum - limitation_description: - name: limitation_description - description: 'Detailed description of the limitation and its implications. + - Confidentiality + range: boolean + confidentiality_details: + name: confidentiality_details + description: 'Details on confidential data elements and handling procedures. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: dcterms:description - alias: limitation_description - owner: DatasetLimitation - domain_of: - - DatasetLimitation - range: string - scope_impact: - name: scope_impact - description: 'How this limitation affects the scope or applicability of the - dataset. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:scopeImpact - alias: scope_impact - owner: DatasetLimitation - domain_of: - - DatasetLimitation - range: string - recommended_mitigation: - name: recommended_mitigation - description: 'Recommended approaches for users to address this limitation. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:recommendedMitigation - alias: recommended_mitigation - owner: DatasetLimitation + alias: confidentiality_details + owner: Confidentiality domain_of: - - DatasetLimitation + - Confidentiality range: string + multivalued: true id: name: id description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: DatasetLimitation + owner: Confidentiality domain_of: - DatasetCollection - Dataset @@ -10083,7 +12756,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: DatasetLimitation + owner: Confidentiality domain_of: - DatasetCollection - Dataset @@ -10166,7 +12839,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: DatasetLimitation + owner: Confidentiality domain_of: - DatasetCollection - Dataset @@ -10249,7 +12922,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: DatasetLimitation + owner: Confidentiality domain_of: - DatasetProperty - Purpose @@ -10319,77 +12992,33 @@ classes: multivalued: true inlined: true inlined_as_list: true - ExternalResource: - name: ExternalResource - description: 'Is the dataset self-contained or does it rely on external resources - (e.g., websites, other datasets)? If external, are there guarantees that those - resources will remain available and unchanged? + ContentWarning: + name: ContentWarning + description: 'Does the dataset contain any data that might be offensive, insulting, + threatening, or otherwise anxiety-provoking if viewed directly? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty - slots: - - external_resources - slot_usage: - external_resources: - name: external_resources - description: List of links or identifiers for external resources. - range: string attributes: - future_guarantees: - name: future_guarantees - description: 'Explanation of any commitments that external resources will - remain available and stable over time. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: dcterms:description - alias: future_guarantees - owner: ExternalResource - domain_of: - - ExternalResource - range: string - multivalued: true - archival: - name: archival - description: 'Indication whether official archival versions of external resources - are included. - - ' + content_warnings_present: + name: content_warnings_present + description: Indicates whether any content warnings are needed. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: schema:archivedAt - alias: archival - owner: ExternalResource + slot_uri: d4d:content_warnings_present + alias: content_warnings_present + owner: ContentWarning domain_of: - - ExternalResource + - ContentWarning range: boolean - multivalued: true - restrictions: - name: restrictions - description: 'Description of any restrictions or fees associated with external - resources. - - ' + warnings: + name: warnings from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: dcterms:accessRights - alias: restrictions - owner: ExternalResource - domain_of: - - ExternalResource - - IPRestrictions - range: string - multivalued: true - external_resources: - name: external_resources - description: List of links or identifiers for external resources. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - slot_uri: dcterms:references - alias: external_resources - owner: ExternalResource + slot_uri: dcterms:description + alias: warnings + owner: ContentWarning domain_of: - - Dataset - - ExternalResource - - DataSubset + - ContentWarning range: string multivalued: true id: @@ -10398,7 +13027,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: ExternalResource + owner: ContentWarning domain_of: - DatasetCollection - Dataset @@ -10481,7 +13110,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: ExternalResource + owner: ContentWarning domain_of: - DatasetCollection - Dataset @@ -10564,7 +13193,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: ExternalResource + owner: ContentWarning domain_of: - DatasetCollection - Dataset @@ -10647,7 +13276,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: ExternalResource + owner: ContentWarning domain_of: - DatasetProperty - Purpose @@ -10717,36 +13346,43 @@ classes: multivalued: true inlined: true inlined_as_list: true - Confidentiality: - name: Confidentiality - description: 'Does the dataset contain data that might be confidential (e.g., - protected by legal privilege, patient data, non-public communications)? + Subpopulation: + name: Subpopulation + description: 'Does the dataset identify any subpopulations (e.g., by age, gender)? + If so, how are they identified and what are their distributions? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - confidential_elements_present: - name: confidential_elements_present - description: Indicates whether any confidential data elements are present. + subpopulation_elements_present: + name: subpopulation_elements_present + description: Indicates whether any subpopulations are explicitly identified. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:confidential_elements_present - alias: confidential_elements_present - owner: Confidentiality + slot_uri: d4d:subpopulationElementsPresent + alias: subpopulation_elements_present + owner: Subpopulation domain_of: - - Confidentiality + - Subpopulation range: boolean - confidentiality_details: - name: confidentiality_details - description: 'Details on confidential data elements and handling procedures. - - ' + identification: + name: identification from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: dcterms:description - alias: confidentiality_details - owner: Confidentiality + alias: identification + owner: Subpopulation domain_of: - - Confidentiality + - Subpopulation + range: string + multivalued: true + distribution: + name: distribution + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: dcterms:description + alias: distribution + owner: Subpopulation + domain_of: + - Subpopulation range: string multivalued: true id: @@ -10755,7 +13391,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: Confidentiality + owner: Subpopulation domain_of: - DatasetCollection - Dataset @@ -10838,7 +13474,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Confidentiality + owner: Subpopulation domain_of: - DatasetCollection - Dataset @@ -10921,7 +13557,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Confidentiality + owner: Subpopulation domain_of: - DatasetCollection - Dataset @@ -11004,7 +13640,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: Confidentiality + owner: Subpopulation domain_of: - DatasetProperty - Purpose @@ -11074,33 +13710,56 @@ classes: multivalued: true inlined: true inlined_as_list: true - ContentWarning: - name: ContentWarning - description: 'Does the dataset contain any data that might be offensive, insulting, - threatening, or otherwise anxiety-provoking if viewed directly? + Deidentification: + name: Deidentification + description: 'Is it possible to identify individuals in the dataset, either directly + or indirectly (in combination with other data)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - content_warnings_present: - name: content_warnings_present - description: Indicates whether any content warnings are needed. + identifiable_elements_present: + name: identifiable_elements_present + description: Indicates whether data subjects can be identified. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:content_warnings_present - alias: content_warnings_present - owner: ContentWarning + slot_uri: d4d:identifiableElementsPresent + alias: identifiable_elements_present + owner: Deidentification + domain_of: + - Deidentification + range: boolean + method: + name: method + description: Method used for de-identification (e.g., HIPAA Safe Harbor). + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + alias: method + owner: Deidentification + domain_of: + - Deidentification + range: string + identifiers_removed: + name: identifiers_removed + description: List of identifier types removed during de-identification. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + slot_uri: schema:identifier + alias: identifiers_removed + owner: Deidentification domain_of: - - ContentWarning - range: boolean - warnings: - name: warnings + - Deidentification + range: string + multivalued: true + deidentification_details: + name: deidentification_details + description: 'Details on de-identification procedures and residual risks. + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: dcterms:description - alias: warnings - owner: ContentWarning + alias: deidentification_details + owner: Deidentification domain_of: - - ContentWarning + - Deidentification range: string multivalued: true id: @@ -11109,7 +13768,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: ContentWarning + owner: Deidentification domain_of: - DatasetCollection - Dataset @@ -11192,7 +13851,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: ContentWarning + owner: Deidentification domain_of: - DatasetCollection - Dataset @@ -11275,7 +13934,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: ContentWarning + owner: Deidentification domain_of: - DatasetCollection - Dataset @@ -11358,7 +14017,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: ContentWarning + owner: Deidentification domain_of: - DatasetProperty - Purpose @@ -11428,43 +14087,38 @@ classes: multivalued: true inlined: true inlined_as_list: true - Subpopulation: - name: Subpopulation - description: 'Does the dataset identify any subpopulations (e.g., by age, gender)? - If so, how are they identified and what are their distributions? + SensitiveElement: + name: SensitiveElement + description: 'Does the dataset contain data that might be considered sensitive + (e.g., race, sexual orientation, religion, biometrics)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - rai:personalSensitiveInformation is_a: DatasetProperty attributes: - subpopulation_elements_present: - name: subpopulation_elements_present - description: Indicates whether any subpopulations are explicitly identified. + sensitive_elements_present: + name: sensitive_elements_present + description: Indicates whether sensitive data elements are present. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:subpopulationElementsPresent - alias: subpopulation_elements_present - owner: Subpopulation + slot_uri: d4d:sensitive_elements_present + alias: sensitive_elements_present + owner: SensitiveElement domain_of: - - Subpopulation + - SensitiveElement range: boolean - identification: - name: identification - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: dcterms:description - alias: identification - owner: Subpopulation - domain_of: - - Subpopulation - range: string - multivalued: true - distribution: - name: distribution + sensitivity_details: + name: sensitivity_details + description: 'Details on sensitive data elements present and handling procedures. + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: dcterms:description - alias: distribution - owner: Subpopulation + alias: sensitivity_details + owner: SensitiveElement domain_of: - - Subpopulation + - SensitiveElement range: string multivalued: true id: @@ -11473,7 +14127,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: Subpopulation + owner: SensitiveElement domain_of: - DatasetCollection - Dataset @@ -11556,7 +14210,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Subpopulation + owner: SensitiveElement domain_of: - DatasetCollection - Dataset @@ -11639,7 +14293,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Subpopulation + owner: SensitiveElement domain_of: - DatasetCollection - Dataset @@ -11722,7 +14376,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: Subpopulation + owner: SensitiveElement domain_of: - DatasetProperty - Purpose @@ -11792,65 +14446,47 @@ classes: multivalued: true inlined: true inlined_as_list: true - Deidentification: - name: Deidentification - description: 'Is it possible to identify individuals in the dataset, either directly - or indirectly (in combination with other data)? + DatasetRelationship: + name: DatasetRelationship + description: 'Typed relationship to another dataset, enabling precise specification + of how datasets relate to each other (e.g., supplements, derives from, is version + of). Supports RO-Crate-style dataset interlinking. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - identifiable_elements_present: - name: identifiable_elements_present - description: Indicates whether data subjects can be identified. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:identifiableElementsPresent - alias: identifiable_elements_present - owner: Deidentification - domain_of: - - Deidentification - range: boolean - method: - name: method - description: Method used for de-identification (e.g., HIPAA Safe Harbor). - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - alias: method - owner: Deidentification - domain_of: - - Deidentification - range: string - identifiers_removed: - name: identifiers_removed - description: List of identifier types removed during de-identification. + target_dataset: + name: target_dataset + description: The dataset that this relationship points to. Can be specified + by identifier, URL, or Dataset object. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition slot_uri: schema:identifier - alias: identifiers_removed - owner: Deidentification + alias: target_dataset + owner: DatasetRelationship domain_of: - - Deidentification + - DatasetRelationship range: string - multivalued: true - deidentification_details: - name: deidentification_details - description: 'Details on de-identification procedures and residual risks. - - ' + required: true + relationship_type: + name: relationship_type + description: The type of relationship (e.g., derives_from, supplements, is_version_of). + Uses DatasetRelationshipTypeEnum for standardized relationship types. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: dcterms:description - alias: deidentification_details - owner: Deidentification + slot_uri: schema:additionalType + alias: relationship_type + owner: DatasetRelationship domain_of: - - Deidentification - range: string - multivalued: true - id: - name: id - description: An optional identifier for this property. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:identifier - alias: id - owner: Deidentification + - DatasetRelationship + range: DatasetRelationshipTypeEnum + required: true + description: + name: description + description: Free-text description providing additional context about the + relationship. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + alias: description + owner: DatasetRelationship domain_of: - DatasetCollection - Dataset @@ -11883,57 +14519,14 @@ classes: - Deidentification - SensitiveElement - DatasetRelationship - - InstanceAcquisition - - CollectionMechanism - - DataCollector - - CollectionTimeframe - - DirectCollection - - MissingDataDocumentation - - RawDataSource - - PreprocessingStrategy - - CleaningStrategy - - LabelingStrategy - - RawData - - ImputationProtocol - - AnnotationAnalysis - - MachineAnnotationTools - - ExistingUse - - UseRepository - - OtherTask - - FutureUseImpact - - DiscouragedUse - - IntendedUse - - ProhibitedUse - - ThirdPartySharing - - DistributionFormat - - DistributionDate - - Maintainer - - Erratum - - UpdatePlan - - RetentionLimits - - VersionAccess - - ExtensionMechanism - - EthicalReview - - DataProtectionImpact - - CollectionNotification - - CollectionConsent - - ConsentRevocation - - HumanSubjectResearch - - InformedConsent - - ParticipantPrivacy - - HumanSubjectCompensation - - AtRiskPopulations - - LicenseAndUseTerms - - IPRestrictions - - ExportControlRegulatoryRestrictions - range: uriorcurie - name: - name: name - description: A human-readable name for this property. + range: string + id: + name: id + description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:name - alias: name - owner: Deidentification + slot_uri: schema:identifier + alias: id + owner: DatasetRelationship domain_of: - DatasetCollection - Dataset @@ -12009,14 +14602,14 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions - range: string - description: - name: description - description: A human-readable description for this property. + range: uriorcurie + name: + name: name + description: A human-readable name for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:description - alias: description - owner: Deidentification + slot_uri: schema:name + alias: name + owner: DatasetRelationship domain_of: - DatasetCollection - Dataset @@ -12024,7 +14617,6 @@ classes: - NamedThing - Organization - DatasetProperty - - DatasetRelationship - Software - Person - Information @@ -12049,6 +14641,7 @@ classes: - Subpopulation - Deidentification - SensitiveElement + - DatasetRelationship - InstanceAcquisition - CollectionMechanism - DataCollector @@ -12099,7 +14692,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: Deidentification + owner: DatasetRelationship domain_of: - DatasetProperty - Purpose @@ -12169,38 +14762,66 @@ classes: multivalued: true inlined: true inlined_as_list: true - SensitiveElement: - name: SensitiveElement - description: 'Does the dataset contain data that might be considered sensitive - (e.g., race, sexual orientation, religion, biometrics)? + InstanceAcquisition: + name: InstanceAcquisition + description: 'Describes how data associated with each instance was acquired (e.g., + directly observed, reported by subjects, inferred). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - rai:personalSensitiveInformation is_a: DatasetProperty - attributes: - sensitive_elements_present: - name: sensitive_elements_present - description: Indicates whether sensitive data elements are present. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: d4d:sensitive_elements_present - alias: sensitive_elements_present - owner: SensitiveElement + attributes: + was_directly_observed: + name: was_directly_observed + description: Whether the data was directly observed + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection + slot_uri: d4d:wasDirectlyObserved + alias: was_directly_observed + owner: InstanceAcquisition domain_of: - - SensitiveElement + - InstanceAcquisition range: boolean - sensitivity_details: - name: sensitivity_details - description: 'Details on sensitive data elements present and handling procedures. + was_reported_by_subjects: + name: was_reported_by_subjects + description: Whether the data was reported directly by the subjects themselves + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection + slot_uri: d4d:wasReportedBySubjects + alias: was_reported_by_subjects + owner: InstanceAcquisition + domain_of: + - InstanceAcquisition + range: boolean + was_inferred_derived: + name: was_inferred_derived + description: Whether the data was inferred or derived from other data + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection + slot_uri: d4d:wasInferred + alias: was_inferred_derived + owner: InstanceAcquisition + domain_of: + - InstanceAcquisition + range: boolean + was_validated_verified: + name: was_validated_verified + description: Whether the data was validated or verified in any way + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection + slot_uri: d4d:wasValidated + alias: was_validated_verified + owner: InstanceAcquisition + domain_of: + - InstanceAcquisition + range: boolean + acquisition_details: + name: acquisition_details + description: 'Details on how data was acquired for each instance. ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection slot_uri: dcterms:description - alias: sensitivity_details - owner: SensitiveElement + alias: acquisition_details + owner: InstanceAcquisition domain_of: - - SensitiveElement + - InstanceAcquisition range: string multivalued: true id: @@ -12209,7 +14830,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: SensitiveElement + owner: InstanceAcquisition domain_of: - DatasetCollection - Dataset @@ -12292,7 +14913,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: SensitiveElement + owner: InstanceAcquisition domain_of: - DatasetCollection - Dataset @@ -12375,7 +14996,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: SensitiveElement + owner: InstanceAcquisition domain_of: - DatasetCollection - Dataset @@ -12458,7 +15079,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: SensitiveElement + owner: InstanceAcquisition domain_of: - DatasetProperty - Purpose @@ -12528,47 +15149,38 @@ classes: multivalued: true inlined: true inlined_as_list: true - DatasetRelationship: - name: DatasetRelationship - description: 'Typed relationship to another dataset, enabling precise specification - of how datasets relate to each other (e.g., supplements, derives from, is version - of). Supports RO-Crate-style dataset interlinking. + CollectionMechanism: + name: CollectionMechanism + description: 'What mechanisms or procedures were used to collect the data (e.g., + hardware, manual curation, software APIs)? Also covers how these mechanisms + were validated. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - rai:dataCollection is_a: DatasetProperty attributes: - target_dataset: - name: target_dataset - description: The dataset that this relationship points to. Can be specified - by identifier, URL, or Dataset object. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: schema:identifier - alias: target_dataset - owner: DatasetRelationship + mechanism_details: + name: mechanism_details + description: 'Details on mechanisms or procedures used to collect the data. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection + slot_uri: dcterms:description + alias: mechanism_details + owner: CollectionMechanism domain_of: - - DatasetRelationship + - CollectionMechanism range: string - required: true - relationship_type: - name: relationship_type - description: The type of relationship (e.g., derives_from, supplements, is_version_of). - Uses DatasetRelationshipTypeEnum for standardized relationship types. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - slot_uri: schema:additionalType - alias: relationship_type - owner: DatasetRelationship - domain_of: - - DatasetRelationship - range: DatasetRelationshipTypeEnum - required: true - description: - name: description - description: Free-text description providing additional context about the - relationship. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/composition - alias: description - owner: DatasetRelationship + multivalued: true + id: + name: id + description: An optional identifier for this property. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base + slot_uri: schema:identifier + alias: id + owner: CollectionMechanism domain_of: - DatasetCollection - Dataset @@ -12601,14 +15213,57 @@ classes: - Deidentification - SensitiveElement - DatasetRelationship - range: string - id: - name: id - description: An optional identifier for this property. + - InstanceAcquisition + - CollectionMechanism + - DataCollector + - CollectionTimeframe + - DirectCollection + - MissingDataDocumentation + - RawDataSource + - PreprocessingStrategy + - CleaningStrategy + - LabelingStrategy + - RawData + - ImputationProtocol + - AnnotationAnalysis + - MachineAnnotationTools + - ExistingUse + - UseRepository + - OtherTask + - FutureUseImpact + - DiscouragedUse + - IntendedUse + - ProhibitedUse + - ThirdPartySharing + - DistributionFormat + - DistributionDate + - Maintainer + - Erratum + - UpdatePlan + - RetentionLimits + - VersionAccess + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + range: uriorcurie + name: + name: name + description: A human-readable name for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:identifier - alias: id - owner: DatasetRelationship + slot_uri: schema:name + alias: name + owner: CollectionMechanism domain_of: - DatasetCollection - Dataset @@ -12684,14 +15339,14 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions - range: uriorcurie - name: - name: name - description: A human-readable name for this property. + range: string + description: + name: description + description: A human-readable description for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: schema:name - alias: name - owner: DatasetRelationship + slot_uri: schema:description + alias: description + owner: CollectionMechanism domain_of: - DatasetCollection - Dataset @@ -12699,6 +15354,7 @@ classes: - NamedThing - Organization - DatasetProperty + - DatasetRelationship - Software - Person - Information @@ -12723,7 +15379,6 @@ classes: - Subpopulation - Deidentification - SensitiveElement - - DatasetRelationship - InstanceAcquisition - CollectionMechanism - DataCollector @@ -12774,7 +15429,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: DatasetRelationship + owner: CollectionMechanism domain_of: - DatasetProperty - Purpose @@ -12844,66 +15499,37 @@ classes: multivalued: true inlined: true inlined_as_list: true - InstanceAcquisition: - name: InstanceAcquisition - description: 'Describes how data associated with each instance was acquired (e.g., - directly observed, reported by subjects, inferred). + DataCollector: + name: DataCollector + description: 'Who was involved in the data collection (e.g., students, crowdworkers, + contractors), and how they were compensated. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - was_directly_observed: - name: was_directly_observed - description: Whether the data was directly observed - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: d4d:wasDirectlyObserved - alias: was_directly_observed - owner: InstanceAcquisition - domain_of: - - InstanceAcquisition - range: boolean - was_reported_by_subjects: - name: was_reported_by_subjects - description: Whether the data was reported directly by the subjects themselves - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: d4d:wasReportedBySubjects - alias: was_reported_by_subjects - owner: InstanceAcquisition - domain_of: - - InstanceAcquisition - range: boolean - was_inferred_derived: - name: was_inferred_derived - description: Whether the data was inferred or derived from other data - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: d4d:wasInferred - alias: was_inferred_derived - owner: InstanceAcquisition - domain_of: - - InstanceAcquisition - range: boolean - was_validated_verified: - name: was_validated_verified - description: Whether the data was validated or verified in any way + role: + name: role + description: Role of the data collector (e.g., researcher, crowdworker) from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: d4d:wasValidated - alias: was_validated_verified - owner: InstanceAcquisition + slot_uri: schema:roleName + alias: role + owner: DataCollector domain_of: - - InstanceAcquisition - range: boolean - acquisition_details: - name: acquisition_details - description: 'Details on how data was acquired for each instance. + - DataCollector + - Maintainer + range: string + collector_details: + name: collector_details + description: 'Details on who collected the data and their compensation. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection slot_uri: dcterms:description - alias: acquisition_details - owner: InstanceAcquisition + alias: collector_details + owner: DataCollector domain_of: - - InstanceAcquisition + - DataCollector range: string multivalued: true id: @@ -12912,7 +15538,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: InstanceAcquisition + owner: DataCollector domain_of: - DatasetCollection - Dataset @@ -12995,7 +15621,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: InstanceAcquisition + owner: DataCollector domain_of: - DatasetCollection - Dataset @@ -13078,7 +15704,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: InstanceAcquisition + owner: DataCollector domain_of: - DatasetCollection - Dataset @@ -13161,7 +15787,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: InstanceAcquisition + owner: DataCollector domain_of: - DatasetProperty - Purpose @@ -13231,29 +15857,49 @@ classes: multivalued: true inlined: true inlined_as_list: true - CollectionMechanism: - name: CollectionMechanism - description: 'What mechanisms or procedures were used to collect the data (e.g., - hardware, manual curation, software APIs)? Also covers how these mechanisms - were validated. + CollectionTimeframe: + name: CollectionTimeframe + description: 'Over what timeframe was the data collected, and does this timeframe + match the creation timeframe of the underlying data? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema exact_mappings: - - rai:dataCollection + - rai:dataCollectionTimeframe is_a: DatasetProperty attributes: - mechanism_details: - name: mechanism_details - description: 'Details on mechanisms or procedures used to collect the data. + start_date: + name: start_date + description: Start date of data collection + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection + slot_uri: schema:startDate + alias: start_date + owner: CollectionTimeframe + domain_of: + - CollectionTimeframe + range: date + end_date: + name: end_date + description: End date of data collection + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection + slot_uri: schema:endDate + alias: end_date + owner: CollectionTimeframe + domain_of: + - CollectionTimeframe + range: date + timeframe_details: + name: timeframe_details + description: 'Details on the collection timeframe and relationship to data + creation dates. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection slot_uri: dcterms:description - alias: mechanism_details - owner: CollectionMechanism + alias: timeframe_details + owner: CollectionTimeframe domain_of: - - CollectionMechanism + - CollectionTimeframe range: string multivalued: true id: @@ -13262,7 +15908,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: CollectionMechanism + owner: CollectionTimeframe domain_of: - DatasetCollection - Dataset @@ -13345,7 +15991,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: CollectionMechanism + owner: CollectionTimeframe domain_of: - DatasetCollection - Dataset @@ -13428,7 +16074,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: CollectionMechanism + owner: CollectionTimeframe domain_of: - DatasetCollection - Dataset @@ -13511,7 +16157,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: CollectionMechanism + owner: CollectionTimeframe domain_of: - DatasetProperty - Purpose @@ -13581,37 +16227,36 @@ classes: multivalued: true inlined: true inlined_as_list: true - DataCollector: - name: DataCollector - description: 'Who was involved in the data collection (e.g., students, crowdworkers, - contractors), and how they were compensated. + DirectCollection: + name: DirectCollection + description: 'Indicates whether the data was collected directly from the individuals + in question or obtained via third parties/other sources. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - role: - name: role - description: Role of the data collector (e.g., researcher, crowdworker) + is_direct: + name: is_direct + description: Whether collection was direct from individuals from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: schema:roleName - alias: role - owner: DataCollector + slot_uri: d4d:isDirect + alias: is_direct + owner: DirectCollection domain_of: - - DataCollector - - Maintainer - range: string - collector_details: - name: collector_details - description: 'Details on who collected the data and their compensation. + - DirectCollection + range: boolean + collection_details: + name: collection_details + description: 'Details on direct vs. indirect collection methods and sources. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection slot_uri: dcterms:description - alias: collector_details - owner: DataCollector + alias: collection_details + owner: DirectCollection domain_of: - - DataCollector + - DirectCollection range: string multivalued: true id: @@ -13620,7 +16265,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: DataCollector + owner: DirectCollection domain_of: - DatasetCollection - Dataset @@ -13703,7 +16348,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: DataCollector + owner: DirectCollection domain_of: - DatasetCollection - Dataset @@ -13786,7 +16431,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: DataCollector + owner: DirectCollection domain_of: - DatasetCollection - Dataset @@ -13869,7 +16514,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: DataCollector + owner: DirectCollection domain_of: - DatasetProperty - Purpose @@ -13939,58 +16584,65 @@ classes: multivalued: true inlined: true inlined_as_list: true - CollectionTimeframe: - name: CollectionTimeframe - description: 'Over what timeframe was the data collected, and does this timeframe - match the creation timeframe of the underlying data? + MissingDataDocumentation: + name: MissingDataDocumentation + description: 'Documentation of missing data in the dataset, including patterns, + causes, and strategies for handling missing values. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema exact_mappings: - - rai:dataCollectionTimeframe + - rai:dataCollectionMissingData is_a: DatasetProperty attributes: - start_date: - name: start_date - description: Start date of data collection + missing_data_patterns: + name: missing_data_patterns + description: 'Description of patterns in missing data (e.g., missing completely + at random, missing at random, missing not at random). + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: schema:startDate - alias: start_date - owner: CollectionTimeframe + slot_uri: d4d:missingDataPatterns + alias: missing_data_patterns + owner: MissingDataDocumentation domain_of: - - CollectionTimeframe - range: date - end_date: - name: end_date - description: End date of data collection + - MissingDataDocumentation + range: string + multivalued: true + missing_data_causes: + name: missing_data_causes + description: 'Known or suspected causes of missing data (e.g., sensor failures, + participant dropout, privacy constraints). + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: schema:endDate - alias: end_date - owner: CollectionTimeframe + slot_uri: d4d:missingDataCauses + alias: missing_data_causes + owner: MissingDataDocumentation domain_of: - - CollectionTimeframe - range: date - timeframe_details: - name: timeframe_details - description: 'Details on the collection timeframe and relationship to data - creation dates. + - MissingDataDocumentation + range: string + multivalued: true + handling_strategy: + name: handling_strategy + description: 'Strategy used to handle missing data (e.g., deletion, imputation, + flagging, multiple imputation). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: dcterms:description - alias: timeframe_details - owner: CollectionTimeframe + slot_uri: d4d:handlingStrategy + alias: handling_strategy + owner: MissingDataDocumentation domain_of: - - CollectionTimeframe + - MissingDataDocumentation range: string - multivalued: true id: name: id description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: CollectionTimeframe + owner: MissingDataDocumentation domain_of: - DatasetCollection - Dataset @@ -14073,7 +16725,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: CollectionTimeframe + owner: MissingDataDocumentation domain_of: - DatasetCollection - Dataset @@ -14156,7 +16808,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: CollectionTimeframe + owner: MissingDataDocumentation domain_of: - DatasetCollection - Dataset @@ -14239,7 +16891,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: CollectionTimeframe + owner: MissingDataDocumentation domain_of: - DatasetProperty - Purpose @@ -14309,36 +16961,69 @@ classes: multivalued: true inlined: true inlined_as_list: true - DirectCollection: - name: DirectCollection - description: 'Indicates whether the data was collected directly from the individuals - in question or obtained via third parties/other sources. + RawDataSource: + name: RawDataSource + description: 'Description of raw data sources before preprocessing, cleaning, + or labeling. Documents where the original data comes from and how it can be + accessed. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - rai:dataCollectionRawData is_a: DatasetProperty attributes: - is_direct: - name: is_direct - description: Whether collection was direct from individuals + source_description: + name: source_description + description: 'Detailed description of where raw data comes from (e.g., sensors, + databases, web APIs, manual collection). + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: d4d:isDirect - alias: is_direct - owner: DirectCollection + slot_uri: dcterms:description + alias: source_description + owner: RawDataSource domain_of: - - DirectCollection - range: boolean - collection_details: - name: collection_details - description: 'Details on direct vs. indirect collection methods and sources. + - RawDataSource + range: string + required: true + source_type: + name: source_type + description: 'Type of raw source (sensor, database, user input, web scraping, + etc.). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: dcterms:description - alias: collection_details - owner: DirectCollection + slot_uri: dcterms:type + alias: source_type + owner: RawDataSource + domain_of: + - RawDataSource + range: string + multivalued: true + access_details: + name: access_details + description: 'Information on how to access or retrieve the raw source data. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection + slot_uri: d4d:accessDetails + alias: access_details + owner: RawDataSource + domain_of: + - RawDataSource + range: string + raw_data_format: + name: raw_data_format + description: 'Format of the raw data before any preprocessing. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection + slot_uri: d4d:rawDataFormat + alias: raw_data_format + owner: RawDataSource domain_of: - - DirectCollection + - RawDataSource range: string multivalued: true id: @@ -14347,7 +17032,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: DirectCollection + owner: RawDataSource domain_of: - DatasetCollection - Dataset @@ -14430,7 +17115,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: DirectCollection + owner: RawDataSource domain_of: - DatasetCollection - Dataset @@ -14513,7 +17198,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: DirectCollection + owner: RawDataSource domain_of: - DatasetCollection - Dataset @@ -14596,7 +17281,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: DirectCollection + owner: RawDataSource domain_of: - DatasetProperty - Purpose @@ -14666,65 +17351,37 @@ classes: multivalued: true inlined: true inlined_as_list: true - MissingDataDocumentation: - name: MissingDataDocumentation - description: 'Documentation of missing data in the dataset, including patterns, - causes, and strategies for handling missing values. + PreprocessingStrategy: + name: PreprocessingStrategy + description: 'Was any preprocessing of the data done (e.g., discretization or + bucketing, tokenization, SIFT feature extraction)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema exact_mappings: - - rai:dataCollectionMissingData + - rai:dataPreprocessingProtocol is_a: DatasetProperty attributes: - missing_data_patterns: - name: missing_data_patterns - description: 'Description of patterns in missing data (e.g., missing completely - at random, missing at random, missing not at random). - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: d4d:missingDataPatterns - alias: missing_data_patterns - owner: MissingDataDocumentation - domain_of: - - MissingDataDocumentation - range: string - multivalued: true - missing_data_causes: - name: missing_data_causes - description: 'Known or suspected causes of missing data (e.g., sensor failures, - participant dropout, privacy constraints). + preprocessing_details: + name: preprocessing_details + description: 'Details on preprocessing steps applied to the data. ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: d4d:missingDataCauses - alias: missing_data_causes - owner: MissingDataDocumentation + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling + slot_uri: dcterms:description + alias: preprocessing_details + owner: PreprocessingStrategy domain_of: - - MissingDataDocumentation + - PreprocessingStrategy range: string multivalued: true - handling_strategy: - name: handling_strategy - description: 'Strategy used to handle missing data (e.g., deletion, imputation, - flagging, multiple imputation). - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: d4d:handlingStrategy - alias: handling_strategy - owner: MissingDataDocumentation - domain_of: - - MissingDataDocumentation - range: string id: name: id description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: MissingDataDocumentation + owner: PreprocessingStrategy domain_of: - DatasetCollection - Dataset @@ -14807,7 +17464,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: MissingDataDocumentation + owner: PreprocessingStrategy domain_of: - DatasetCollection - Dataset @@ -14890,7 +17547,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: MissingDataDocumentation + owner: PreprocessingStrategy domain_of: - DatasetCollection - Dataset @@ -14973,7 +17630,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: MissingDataDocumentation + owner: PreprocessingStrategy domain_of: - DatasetProperty - Purpose @@ -15043,69 +17700,28 @@ classes: multivalued: true inlined: true inlined_as_list: true - RawDataSource: - name: RawDataSource - description: 'Description of raw data sources before preprocessing, cleaning, - or labeling. Documents where the original data comes from and how it can be - accessed. + CleaningStrategy: + name: CleaningStrategy + description: 'Was any cleaning of the data done (e.g., removal of instances, processing + of missing values)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema exact_mappings: - - rai:dataCollectionRawData + - rai:dataManipulationProtocol is_a: DatasetProperty attributes: - source_description: - name: source_description - description: 'Detailed description of where raw data comes from (e.g., sensors, - databases, web APIs, manual collection). + cleaning_details: + name: cleaning_details + description: 'Details on data cleaning procedures applied. ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling slot_uri: dcterms:description - alias: source_description - owner: RawDataSource - domain_of: - - RawDataSource - range: string - required: true - source_type: - name: source_type - description: 'Type of raw source (sensor, database, user input, web scraping, - etc.). - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: dcterms:type - alias: source_type - owner: RawDataSource - domain_of: - - RawDataSource - range: string - multivalued: true - access_details: - name: access_details - description: 'Information on how to access or retrieve the raw source data. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: d4d:accessDetails - alias: access_details - owner: RawDataSource - domain_of: - - RawDataSource - range: string - raw_data_format: - name: raw_data_format - description: 'Format of the raw data before any preprocessing. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/collection - slot_uri: d4d:rawDataFormat - alias: raw_data_format - owner: RawDataSource + alias: cleaning_details + owner: CleaningStrategy domain_of: - - RawDataSource + - CleaningStrategy range: string multivalued: true id: @@ -15114,7 +17730,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: RawDataSource + owner: CleaningStrategy domain_of: - DatasetCollection - Dataset @@ -15197,7 +17813,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: RawDataSource + owner: CleaningStrategy domain_of: - DatasetCollection - Dataset @@ -15280,7 +17896,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: RawDataSource + owner: CleaningStrategy domain_of: - DatasetCollection - Dataset @@ -15363,7 +17979,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: RawDataSource + owner: CleaningStrategy domain_of: - DatasetProperty - Purpose @@ -15433,28 +18049,93 @@ classes: multivalued: true inlined: true inlined_as_list: true - PreprocessingStrategy: - name: PreprocessingStrategy - description: 'Was any preprocessing of the data done (e.g., discretization or - bucketing, tokenization, SIFT feature extraction)? + LabelingStrategy: + name: LabelingStrategy + description: 'Was any labeling of the data done (e.g., part-of-speech tagging)? + This class documents the annotation process and quality metrics. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - rai:dataPreprocessingProtocol is_a: DatasetProperty attributes: - preprocessing_details: - name: preprocessing_details - description: 'Details on preprocessing steps applied to the data. + data_annotation_platform: + name: data_annotation_platform + description: Platform or tool used for annotation (e.g., Label Studio, Prodigy, + Amazon Mechanical Turk, custom annotation tool). + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling + exact_mappings: + - rai:dataAnnotationPlatform + slot_uri: schema:instrument + alias: data_annotation_platform + owner: LabelingStrategy + domain_of: + - LabelingStrategy + range: string + data_annotation_protocol: + name: data_annotation_protocol + description: Annotation methodology, tasks, and protocols followed during + labeling. Includes annotation guidelines, quality control procedures, and + task definitions. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling + exact_mappings: + - rai:dataAnnotationProtocol + slot_uri: d4d:dataAnnotationProtocol + alias: data_annotation_protocol + owner: LabelingStrategy + domain_of: + - LabelingStrategy + range: string + multivalued: true + annotations_per_item: + name: annotations_per_item + description: Number of annotations collected per data item. Multiple annotations + per item enable calculation of inter-annotator agreement. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling + exact_mappings: + - rai:annotationsPerItem + slot_uri: d4d:annotationsPerItem + alias: annotations_per_item + owner: LabelingStrategy + domain_of: + - LabelingStrategy + range: integer + inter_annotator_agreement: + name: inter_annotator_agreement + description: Measure of agreement between annotators (e.g., Cohen's kappa, + Fleiss' kappa, Krippendorff's alpha, percent agreement). Include both the + metric name and value. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling + slot_uri: schema:measurementMethod + alias: inter_annotator_agreement + owner: LabelingStrategy + domain_of: + - LabelingStrategy + range: string + annotator_demographics: + name: annotator_demographics + description: Demographic information about annotators, if available and relevant + (e.g., geographic location, language background, expertise level). + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling + exact_mappings: + - rai:annotatorDemographics + slot_uri: d4d:annotatorDemographics + alias: annotator_demographics + owner: LabelingStrategy + domain_of: + - LabelingStrategy + range: string + multivalued: true + labeling_details: + name: labeling_details + description: 'Details on labeling/annotation procedures and quality metrics. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling slot_uri: dcterms:description - alias: preprocessing_details - owner: PreprocessingStrategy + alias: labeling_details + owner: LabelingStrategy domain_of: - - PreprocessingStrategy + - LabelingStrategy range: string multivalued: true id: @@ -15463,7 +18144,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: PreprocessingStrategy + owner: LabelingStrategy domain_of: - DatasetCollection - Dataset @@ -15546,7 +18227,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: PreprocessingStrategy + owner: LabelingStrategy domain_of: - DatasetCollection - Dataset @@ -15629,7 +18310,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: PreprocessingStrategy + owner: LabelingStrategy domain_of: - DatasetCollection - Dataset @@ -15712,7 +18393,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: PreprocessingStrategy + owner: LabelingStrategy domain_of: - DatasetProperty - Purpose @@ -15782,28 +18463,36 @@ classes: multivalued: true inlined: true inlined_as_list: true - CleaningStrategy: - name: CleaningStrategy - description: 'Was any cleaning of the data done (e.g., removal of instances, processing - of missing values)? + RawData: + name: RawData + description: 'Was the "raw" data saved in addition to the preprocessed/cleaned/labeled + data? If so, please provide a link or other access point to the "raw" data. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - rai:dataManipulationProtocol is_a: DatasetProperty attributes: - cleaning_details: - name: cleaning_details - description: 'Details on data cleaning procedures applied. + access_url: + name: access_url + description: URL or access point for the raw data. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling + slot_uri: dcat:accessURL + alias: access_url + owner: RawData + domain_of: + - RawData + range: uri + raw_data_details: + name: raw_data_details + description: 'Details on raw data availability and access procedures. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling slot_uri: dcterms:description - alias: cleaning_details - owner: CleaningStrategy + alias: raw_data_details + owner: RawData domain_of: - - CleaningStrategy + - RawData range: string multivalued: true id: @@ -15812,7 +18501,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: CleaningStrategy + owner: RawData domain_of: - DatasetCollection - Dataset @@ -15895,7 +18584,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: CleaningStrategy + owner: RawData domain_of: - DatasetCollection - Dataset @@ -15978,7 +18667,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: CleaningStrategy + owner: RawData domain_of: - DatasetCollection - Dataset @@ -16061,7 +18750,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: CleaningStrategy + owner: RawData domain_of: - DatasetProperty - Purpose @@ -16131,93 +18820,68 @@ classes: multivalued: true inlined: true inlined_as_list: true - LabelingStrategy: - name: LabelingStrategy - description: 'Was any labeling of the data done (e.g., part-of-speech tagging)? - This class documents the annotation process and quality metrics. + ImputationProtocol: + name: ImputationProtocol + description: 'Description of data imputation methodology, including techniques + used to handle missing values and rationale for chosen approaches. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - rai:dataImputationProtocol is_a: DatasetProperty attributes: - data_annotation_platform: - name: data_annotation_platform - description: Platform or tool used for annotation (e.g., Label Studio, Prodigy, - Amazon Mechanical Turk, custom annotation tool). - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - exact_mappings: - - rai:dataAnnotationPlatform - slot_uri: schema:instrument - alias: data_annotation_platform - owner: LabelingStrategy - domain_of: - - LabelingStrategy - range: string - data_annotation_protocol: - name: data_annotation_protocol - description: Annotation methodology, tasks, and protocols followed during - labeling. Includes annotation guidelines, quality control procedures, and - task definitions. + imputation_method: + name: imputation_method + description: 'Specific imputation technique used (mean, median, mode, forward + fill, backward fill, interpolation, model-based imputation, etc.). + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - exact_mappings: - - rai:dataAnnotationProtocol - slot_uri: d4d:dataAnnotationProtocol - alias: data_annotation_protocol - owner: LabelingStrategy + slot_uri: d4d:imputation_method + alias: imputation_method + owner: ImputationProtocol domain_of: - - LabelingStrategy + - ImputationProtocol range: string multivalued: true - annotations_per_item: - name: annotations_per_item - description: Number of annotations collected per data item. Multiple annotations - per item enable calculation of inter-annotator agreement. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - exact_mappings: - - rai:annotationsPerItem - slot_uri: d4d:annotationsPerItem - alias: annotations_per_item - owner: LabelingStrategy - domain_of: - - LabelingStrategy - range: integer - inter_annotator_agreement: - name: inter_annotator_agreement - description: Measure of agreement between annotators (e.g., Cohen's kappa, - Fleiss' kappa, Krippendorff's alpha, percent agreement). Include both the - metric name and value. + imputed_fields: + name: imputed_fields + description: 'Fields or columns where imputation was applied. + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: schema:measurementMethod - alias: inter_annotator_agreement - owner: LabelingStrategy + slot_uri: d4d:imputed_fields + alias: imputed_fields + owner: ImputationProtocol domain_of: - - LabelingStrategy + - ImputationProtocol range: string - annotator_demographics: - name: annotator_demographics - description: Demographic information about annotators, if available and relevant - (e.g., geographic location, language background, expertise level). + multivalued: true + imputation_rationale: + name: imputation_rationale + description: 'Justification for the imputation approach chosen, including + assumptions made about missing data mechanisms. + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - exact_mappings: - - rai:annotatorDemographics - slot_uri: d4d:annotatorDemographics - alias: annotator_demographics - owner: LabelingStrategy + slot_uri: d4d:imputation_rationale + alias: imputation_rationale + owner: ImputationProtocol domain_of: - - LabelingStrategy + - ImputationProtocol range: string - multivalued: true - labeling_details: - name: labeling_details - description: 'Details on labeling/annotation procedures and quality metrics. + imputation_validation: + name: imputation_validation + description: 'Methods used to validate imputation quality (if any). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: dcterms:description - alias: labeling_details - owner: LabelingStrategy + slot_uri: d4d:imputation_validation + alias: imputation_validation + owner: ImputationProtocol domain_of: - - LabelingStrategy + - ImputationProtocol range: string multivalued: true id: @@ -16226,7 +18890,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: LabelingStrategy + owner: ImputationProtocol domain_of: - DatasetCollection - Dataset @@ -16309,7 +18973,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: LabelingStrategy + owner: ImputationProtocol domain_of: - DatasetCollection - Dataset @@ -16392,7 +19056,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: LabelingStrategy + owner: ImputationProtocol domain_of: - DatasetCollection - Dataset @@ -16475,7 +19139,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: LabelingStrategy + owner: ImputationProtocol domain_of: - DatasetProperty - Purpose @@ -16545,36 +19209,80 @@ classes: multivalued: true inlined: true inlined_as_list: true - RawData: - name: RawData - description: 'Was the "raw" data saved in addition to the preprocessed/cleaned/labeled - data? If so, please provide a link or other access point to the "raw" data. + AnnotationAnalysis: + name: AnnotationAnalysis + description: 'Analysis of annotation quality, inter-annotator agreement metrics, + and systematic patterns in annotation disagreements. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - rai:dataAnnotationAnalysis is_a: DatasetProperty attributes: - access_url: - name: access_url - description: URL or access point for the raw data. + inter_annotator_agreement_score: + name: inter_annotator_agreement_score + description: 'Measured agreement between annotators (e.g., Cohen''s kappa + value, Fleiss'' kappa, Krippendorff''s alpha). + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: dcat:accessURL - alias: access_url - owner: RawData + slot_uri: d4d:interAnnotatorAgreementScore + alias: inter_annotator_agreement_score + owner: AnnotationAnalysis domain_of: - - RawData - range: uri - raw_data_details: - name: raw_data_details - description: 'Details on raw data availability and access procedures. + - AnnotationAnalysis + range: float + agreement_metric: + name: agreement_metric + description: 'Type of agreement metric used (Cohen''s kappa, Fleiss'' kappa, + Krippendorff''s alpha, percentage agreement, etc.). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: dcterms:description - alias: raw_data_details - owner: RawData + slot_uri: d4d:agreementMetric + alias: agreement_metric + owner: AnnotationAnalysis domain_of: - - RawData + - AnnotationAnalysis + range: string + analysis_method: + name: analysis_method + description: 'Methodology used to assess annotation quality and resolve disagreements. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling + slot_uri: d4d:analysisMethod + alias: analysis_method + owner: AnnotationAnalysis + domain_of: + - AnnotationAnalysis + range: string + disagreement_patterns: + name: disagreement_patterns + description: 'Systematic patterns in annotator disagreements (e.g., by demographic + group, annotation difficulty, task type). + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling + slot_uri: d4d:disagreementPatterns + alias: disagreement_patterns + owner: AnnotationAnalysis + domain_of: + - AnnotationAnalysis + range: string + multivalued: true + annotation_quality_details: + name: annotation_quality_details + description: 'Additional details on annotation quality assessment and findings. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling + slot_uri: d4d:annotationQualityDetails + alias: annotation_quality_details + owner: AnnotationAnalysis + domain_of: + - AnnotationAnalysis range: string multivalued: true id: @@ -16583,7 +19291,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: RawData + owner: AnnotationAnalysis domain_of: - DatasetCollection - Dataset @@ -16666,7 +19374,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: RawData + owner: AnnotationAnalysis domain_of: - DatasetCollection - Dataset @@ -16749,7 +19457,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: RawData + owner: AnnotationAnalysis domain_of: - DatasetCollection - Dataset @@ -16832,7 +19540,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: RawData + owner: AnnotationAnalysis domain_of: - DatasetProperty - Purpose @@ -16902,68 +19610,62 @@ classes: multivalued: true inlined: true inlined_as_list: true - ImputationProtocol: - name: ImputationProtocol - description: 'Description of data imputation methodology, including techniques - used to handle missing values and rationale for chosen approaches. + MachineAnnotationTools: + name: MachineAnnotationTools + description: 'Automated or machine-learning-based annotation tools used in dataset + creation, including NLP pipelines, computer vision models, or other automated + labeling systems. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema exact_mappings: - - rai:dataImputationProtocol + - rai:machineAnnotationTools is_a: DatasetProperty attributes: - imputation_method: - name: imputation_method - description: 'Specific imputation technique used (mean, median, mode, forward - fill, backward fill, interpolation, model-based imputation, etc.). + tools: + name: tools + description: 'List of automated annotation tools with their versions. Format + each entry as "ToolName version" (e.g., "spaCy 3.5.0", "NLTK 3.8", "GPT-4 + turbo"). Use "unknown" for version if not available (e.g., "Custom NER Model + unknown"). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:imputation_method - alias: imputation_method - owner: ImputationProtocol + slot_uri: schema:name + alias: tools + owner: MachineAnnotationTools domain_of: - - ImputationProtocol + - MachineAnnotationTools range: string multivalued: true - imputed_fields: - name: imputed_fields - description: 'Fields or columns where imputation was applied. + tool_descriptions: + name: tool_descriptions + description: 'Descriptions of what each tool does in the annotation process + and what types of annotations it produces. Should correspond to the tools + list. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:imputed_fields - alias: imputed_fields - owner: ImputationProtocol + slot_uri: d4d:toolDescriptions + alias: tool_descriptions + owner: MachineAnnotationTools domain_of: - - ImputationProtocol + - MachineAnnotationTools range: string multivalued: true - imputation_rationale: - name: imputation_rationale - description: 'Justification for the imputation approach chosen, including - assumptions made about missing data mechanisms. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:imputation_rationale - alias: imputation_rationale - owner: ImputationProtocol - domain_of: - - ImputationProtocol - range: string - imputation_validation: - name: imputation_validation - description: 'Methods used to validate imputation quality (if any). + tool_accuracy: + name: tool_accuracy + description: 'Known accuracy or performance metrics for the automated tools + (if available). Include metric name and value (e.g., "spaCy F1: 0.95", "GPT-4 + Accuracy: 92%"). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:imputation_validation - alias: imputation_validation - owner: ImputationProtocol + slot_uri: d4d:toolAccuracy + alias: tool_accuracy + owner: MachineAnnotationTools domain_of: - - ImputationProtocol + - MachineAnnotationTools range: string multivalued: true id: @@ -16972,7 +19674,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: ImputationProtocol + owner: MachineAnnotationTools domain_of: - DatasetCollection - Dataset @@ -17055,7 +19757,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: ImputationProtocol + owner: MachineAnnotationTools domain_of: - DatasetCollection - Dataset @@ -17138,7 +19840,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: ImputationProtocol + owner: MachineAnnotationTools domain_of: - DatasetCollection - Dataset @@ -17221,7 +19923,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: ImputationProtocol + owner: MachineAnnotationTools domain_of: - DatasetProperty - Purpose @@ -17291,80 +19993,25 @@ classes: multivalued: true inlined: true inlined_as_list: true - AnnotationAnalysis: - name: AnnotationAnalysis - description: 'Analysis of annotation quality, inter-annotator agreement metrics, - and systematic patterns in annotation disagreements. + ExistingUse: + name: ExistingUse + description: 'Has the dataset been used for any tasks already? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - rai:dataAnnotationAnalysis is_a: DatasetProperty attributes: - inter_annotator_agreement_score: - name: inter_annotator_agreement_score - description: 'Measured agreement between annotators (e.g., Cohen''s kappa - value, Fleiss'' kappa, Krippendorff''s alpha). - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:interAnnotatorAgreementScore - alias: inter_annotator_agreement_score - owner: AnnotationAnalysis - domain_of: - - AnnotationAnalysis - range: float - agreement_metric: - name: agreement_metric - description: 'Type of agreement metric used (Cohen''s kappa, Fleiss'' kappa, - Krippendorff''s alpha, percentage agreement, etc.). - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:agreementMetric - alias: agreement_metric - owner: AnnotationAnalysis - domain_of: - - AnnotationAnalysis - range: string - analysis_method: - name: analysis_method - description: 'Methodology used to assess annotation quality and resolve disagreements. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:analysisMethod - alias: analysis_method - owner: AnnotationAnalysis - domain_of: - - AnnotationAnalysis - range: string - disagreement_patterns: - name: disagreement_patterns - description: 'Systematic patterns in annotator disagreements (e.g., by demographic - group, annotation difficulty, task type). - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:disagreementPatterns - alias: disagreement_patterns - owner: AnnotationAnalysis - domain_of: - - AnnotationAnalysis - range: string - multivalued: true - annotation_quality_details: - name: annotation_quality_details - description: 'Additional details on annotation quality assessment and findings. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:annotationQualityDetails - alias: annotation_quality_details - owner: AnnotationAnalysis + examples: + name: examples + description: List of examples of known/previous uses of the dataset. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses + slot_uri: schema:example + alias: examples + owner: ExistingUse domain_of: - - AnnotationAnalysis + - ExistingUse + - IntendedUse + - VariableMetadata range: string multivalued: true id: @@ -17373,7 +20020,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: AnnotationAnalysis + owner: ExistingUse domain_of: - DatasetCollection - Dataset @@ -17456,7 +20103,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: AnnotationAnalysis + owner: ExistingUse domain_of: - DatasetCollection - Dataset @@ -17539,7 +20186,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: AnnotationAnalysis + owner: ExistingUse domain_of: - DatasetCollection - Dataset @@ -17622,7 +20269,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: AnnotationAnalysis + owner: ExistingUse domain_of: - DatasetProperty - Purpose @@ -17692,62 +20339,35 @@ classes: multivalued: true inlined: true inlined_as_list: true - MachineAnnotationTools: - name: MachineAnnotationTools - description: 'Automated or machine-learning-based annotation tools used in dataset - creation, including NLP pipelines, computer vision models, or other automated - labeling systems. + UseRepository: + name: UseRepository + description: 'Is there a repository that links to any or all papers or systems + that use the dataset? If so, provide a link or other access point. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - rai:machineAnnotationTools is_a: DatasetProperty attributes: - tools: - name: tools - description: 'List of automated annotation tools with their versions. Format - each entry as "ToolName version" (e.g., "spaCy 3.5.0", "NLTK 3.8", "GPT-4 - turbo"). Use "unknown" for version if not available (e.g., "Custom NER Model - unknown"). - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: schema:name - alias: tools - owner: MachineAnnotationTools - domain_of: - - MachineAnnotationTools - range: string - multivalued: true - tool_descriptions: - name: tool_descriptions - description: 'Descriptions of what each tool does in the annotation process - and what types of annotations it produces. Should correspond to the tools - list. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:toolDescriptions - alias: tool_descriptions - owner: MachineAnnotationTools + repository_url: + name: repository_url + description: URL to a repository of known dataset uses. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses + alias: repository_url + owner: UseRepository domain_of: - - MachineAnnotationTools - range: string - multivalued: true - tool_accuracy: - name: tool_accuracy - description: 'Known accuracy or performance metrics for the automated tools - (if available). Include metric name and value (e.g., "spaCy F1: 0.95", "GPT-4 - Accuracy: 92%"). + - UseRepository + range: uri + repository_details: + name: repository_details + description: 'Details on the repository of known dataset uses. ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/preprocessing-cleaning-labeling - slot_uri: d4d:toolAccuracy - alias: tool_accuracy - owner: MachineAnnotationTools + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses + slot_uri: dcterms:description + alias: repository_details + owner: UseRepository domain_of: - - MachineAnnotationTools + - UseRepository range: string multivalued: true id: @@ -17756,7 +20376,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: MachineAnnotationTools + owner: UseRepository domain_of: - DatasetCollection - Dataset @@ -17839,7 +20459,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: MachineAnnotationTools + owner: UseRepository domain_of: - DatasetCollection - Dataset @@ -17922,7 +20542,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: MachineAnnotationTools + owner: UseRepository domain_of: - DatasetCollection - Dataset @@ -18005,7 +20625,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: MachineAnnotationTools + owner: UseRepository domain_of: - DatasetProperty - Purpose @@ -18075,25 +20695,25 @@ classes: multivalued: true inlined: true inlined_as_list: true - ExistingUse: - name: ExistingUse - description: 'Has the dataset been used for any tasks already? + OtherTask: + name: OtherTask + description: 'What other tasks could the dataset be used for? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - examples: - name: examples - description: List of examples of known/previous uses of the dataset. + task_details: + name: task_details + description: 'Details on other potential tasks the dataset could be used for. + + ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses - slot_uri: schema:example - alias: examples - owner: ExistingUse + slot_uri: dcterms:description + alias: task_details + owner: OtherTask domain_of: - - ExistingUse - - IntendedUse - - VariableMetadata + - OtherTask range: string multivalued: true id: @@ -18102,7 +20722,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: ExistingUse + owner: OtherTask domain_of: - DatasetCollection - Dataset @@ -18185,7 +20805,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: ExistingUse + owner: OtherTask domain_of: - DatasetCollection - Dataset @@ -18268,7 +20888,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: ExistingUse + owner: OtherTask domain_of: - DatasetCollection - Dataset @@ -18351,7 +20971,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: ExistingUse + owner: OtherTask domain_of: - DatasetProperty - Purpose @@ -18421,35 +21041,31 @@ classes: multivalued: true inlined: true inlined_as_list: true - UseRepository: - name: UseRepository - description: 'Is there a repository that links to any or all papers or systems - that use the dataset? If so, provide a link or other access point. + FutureUseImpact: + name: FutureUseImpact + description: 'Is there anything about the dataset''s composition or collection + that might impact future uses or create risks/harm (e.g., unfair treatment, + legal or financial risks)? If so, describe these impacts and any mitigation + strategies. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - rai:dataSocialImpact is_a: DatasetProperty attributes: - repository_url: - name: repository_url - description: URL to a repository of known dataset uses. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses - alias: repository_url - owner: UseRepository - domain_of: - - UseRepository - range: uri - repository_details: - name: repository_details - description: 'Details on the repository of known dataset uses. + impact_details: + name: impact_details + description: 'Details on potential impacts, risks, and mitigation strategies. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses slot_uri: dcterms:description - alias: repository_details - owner: UseRepository + alias: impact_details + owner: FutureUseImpact domain_of: - - UseRepository + - FutureUseImpact + - DataProtectionImpact range: string multivalued: true id: @@ -18458,7 +21074,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: UseRepository + owner: FutureUseImpact domain_of: - DatasetCollection - Dataset @@ -18541,7 +21157,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: UseRepository + owner: FutureUseImpact domain_of: - DatasetCollection - Dataset @@ -18624,7 +21240,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: UseRepository + owner: FutureUseImpact domain_of: - DatasetCollection - Dataset @@ -18707,7 +21323,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: UseRepository + owner: FutureUseImpact domain_of: - DatasetProperty - Purpose @@ -18777,25 +21393,25 @@ classes: multivalued: true inlined: true inlined_as_list: true - OtherTask: - name: OtherTask - description: 'What other tasks could the dataset be used for? + DiscouragedUse: + name: DiscouragedUse + description: 'Are there tasks for which the dataset should not be used? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - task_details: - name: task_details - description: 'Details on other potential tasks the dataset could be used for. + discouragement_details: + name: discouragement_details + description: 'Details on tasks for which the dataset should not be used. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses slot_uri: dcterms:description - alias: task_details - owner: OtherTask + alias: discouragement_details + owner: DiscouragedUse domain_of: - - OtherTask + - DiscouragedUse range: string multivalued: true id: @@ -18804,7 +21420,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: OtherTask + owner: DiscouragedUse domain_of: - DatasetCollection - Dataset @@ -18887,7 +21503,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: OtherTask + owner: DiscouragedUse domain_of: - DatasetCollection - Dataset @@ -18970,7 +21586,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: OtherTask + owner: DiscouragedUse domain_of: - DatasetCollection - Dataset @@ -19053,7 +21669,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: OtherTask + owner: DiscouragedUse domain_of: - DatasetProperty - Purpose @@ -19123,31 +21739,49 @@ classes: multivalued: true inlined: true inlined_as_list: true - FutureUseImpact: - name: FutureUseImpact - description: 'Is there anything about the dataset''s composition or collection - that might impact future uses or create risks/harm (e.g., unfair treatment, - legal or financial risks)? If so, describe these impacts and any mitigation - strategies. + IntendedUse: + name: IntendedUse + description: 'Explicit statement of intended uses for this dataset. Complements + FutureUseImpact by focusing on positive, recommended applications rather than + risks. Aligns with RO-Crate "Intended Use" field. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema exact_mappings: - - rai:dataSocialImpact + - rai:dataUseCases is_a: DatasetProperty attributes: - impact_details: - name: impact_details - description: 'Details on potential impacts, risks, and mitigation strategies. - - ' + examples: + name: examples + description: List of example intended uses for this dataset. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses - slot_uri: dcterms:description - alias: impact_details - owner: FutureUseImpact + alias: examples + owner: IntendedUse domain_of: - - FutureUseImpact - - DataProtectionImpact + - ExistingUse + - IntendedUse + - VariableMetadata + range: string + multivalued: true + usage_notes: + name: usage_notes + description: Notes or caveats about using the dataset for intended purposes. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses + alias: usage_notes + owner: IntendedUse + domain_of: + - IntendedUse + range: string + use_category: + name: use_category + description: Category of intended use (e.g., research, clinical, educational, + commercial, policy). + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses + slot_uri: d4d:useCategory + alias: use_category + owner: IntendedUse + domain_of: + - IntendedUse range: string multivalued: true id: @@ -19156,7 +21790,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: FutureUseImpact + owner: IntendedUse domain_of: - DatasetCollection - Dataset @@ -19239,7 +21873,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: FutureUseImpact + owner: IntendedUse domain_of: - DatasetCollection - Dataset @@ -19322,7 +21956,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: FutureUseImpact + owner: IntendedUse domain_of: - DatasetCollection - Dataset @@ -19405,7 +22039,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: FutureUseImpact + owner: IntendedUse domain_of: - DatasetProperty - Purpose @@ -19475,25 +22109,26 @@ classes: multivalued: true inlined: true inlined_as_list: true - DiscouragedUse: - name: DiscouragedUse - description: 'Are there tasks for which the dataset should not be used? + ProhibitedUse: + name: ProhibitedUse + description: 'Explicit statement of prohibited or forbidden uses for this dataset. + Stronger than DiscouragedUse - these are uses that are explicitly not permitted + by license, ethics, or policy. Aligns with RO-Crate "Prohibited Uses" field. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - discouragement_details: - name: discouragement_details - description: 'Details on tasks for which the dataset should not be used. - - ' + prohibition_reason: + name: prohibition_reason + description: Reason why this use is prohibited (e.g., license restriction, + ethical concern, privacy risk, legal constraint). from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses - slot_uri: dcterms:description - alias: discouragement_details - owner: DiscouragedUse + slot_uri: d4d:prohibitionReason + alias: prohibition_reason + owner: ProhibitedUse domain_of: - - DiscouragedUse + - ProhibitedUse range: string multivalued: true id: @@ -19502,7 +22137,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: DiscouragedUse + owner: ProhibitedUse domain_of: - DatasetCollection - Dataset @@ -19585,7 +22220,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: DiscouragedUse + owner: ProhibitedUse domain_of: - DatasetCollection - Dataset @@ -19668,7 +22303,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: DiscouragedUse + owner: ProhibitedUse domain_of: - DatasetCollection - Dataset @@ -19751,7 +22386,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: DiscouragedUse + owner: ProhibitedUse domain_of: - DatasetProperty - Purpose @@ -19821,58 +22456,36 @@ classes: multivalued: true inlined: true inlined_as_list: true - IntendedUse: - name: IntendedUse - description: 'Explicit statement of intended uses for this dataset. Complements - FutureUseImpact by focusing on positive, recommended applications rather than - risks. Aligns with RO-Crate "Intended Use" field. + ThirdPartySharing: + name: ThirdPartySharing + description: 'Will the dataset be distributed to third parties outside of the + entity (e.g., company, institution, organization) on behalf of which the dataset + was created? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - rai:dataUseCases is_a: DatasetProperty attributes: - examples: - name: examples - description: List of example intended uses for this dataset. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses - alias: examples - owner: IntendedUse - domain_of: - - ExistingUse - - IntendedUse - - VariableMetadata - range: string - multivalued: true - usage_notes: - name: usage_notes - description: Notes or caveats about using the dataset for intended purposes. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses - alias: usage_notes - owner: IntendedUse - domain_of: - - IntendedUse - range: string - use_category: - name: use_category - description: Category of intended use (e.g., research, clinical, educational, - commercial, policy). - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses - slot_uri: d4d:useCategory - alias: use_category - owner: IntendedUse + is_shared: + name: is_shared + description: 'Boolean indicating whether the dataset is distributed to parties + external to the dataset-creating entity. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/distribution + slot_uri: dcterms:accessRights + alias: is_shared + owner: ThirdPartySharing domain_of: - - IntendedUse - range: string - multivalued: true + - ThirdPartySharing + range: boolean id: name: id description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: IntendedUse + owner: ThirdPartySharing domain_of: - DatasetCollection - Dataset @@ -19955,7 +22568,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: IntendedUse + owner: ThirdPartySharing domain_of: - DatasetCollection - Dataset @@ -20038,7 +22651,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: IntendedUse + owner: ThirdPartySharing domain_of: - DatasetCollection - Dataset @@ -20121,7 +22734,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: IntendedUse + owner: ThirdPartySharing domain_of: - DatasetProperty - Purpose @@ -20191,26 +22804,24 @@ classes: multivalued: true inlined: true inlined_as_list: true - ProhibitedUse: - name: ProhibitedUse - description: 'Explicit statement of prohibited or forbidden uses for this dataset. - Stronger than DiscouragedUse - these are uses that are explicitly not permitted - by license, ethics, or policy. Aligns with RO-Crate "Prohibited Uses" field. + DistributionFormat: + name: DistributionFormat + description: 'How will the dataset be distributed (e.g., tarball on a website, + API, GitHub)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - prohibition_reason: - name: prohibition_reason - description: Reason why this use is prohibited (e.g., license restriction, - ethical concern, privacy risk, legal constraint). - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/uses - slot_uri: d4d:prohibitionReason - alias: prohibition_reason - owner: ProhibitedUse + access_urls: + name: access_urls + description: Details of the distribution channel(s) or format(s). + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/distribution + slot_uri: dcat:accessURL + alias: access_urls + owner: DistributionFormat domain_of: - - ProhibitedUse + - DistributionFormat range: string multivalued: true id: @@ -20219,7 +22830,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: ProhibitedUse + owner: DistributionFormat domain_of: - DatasetCollection - Dataset @@ -20302,7 +22913,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: ProhibitedUse + owner: DistributionFormat domain_of: - DatasetCollection - Dataset @@ -20385,7 +22996,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: ProhibitedUse + owner: DistributionFormat domain_of: - DatasetCollection - Dataset @@ -20468,7 +23079,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: ProhibitedUse + owner: DistributionFormat domain_of: - DatasetProperty - Purpose @@ -20538,36 +23149,35 @@ classes: multivalued: true inlined: true inlined_as_list: true - ThirdPartySharing: - name: ThirdPartySharing - description: 'Will the dataset be distributed to third parties outside of the - entity (e.g., company, institution, organization) on behalf of which the dataset - was created? + DistributionDate: + name: DistributionDate + description: 'When will the dataset be distributed? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - is_shared: - name: is_shared - description: 'Boolean indicating whether the dataset is distributed to parties - external to the dataset-creating entity. + release_dates: + name: release_dates + description: 'Dates or timeframe for dataset release. Could be a one-time + release date or multiple scheduled releases. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/distribution - slot_uri: dcterms:accessRights - alias: is_shared - owner: ThirdPartySharing + slot_uri: dcterms:available + alias: release_dates + owner: DistributionDate domain_of: - - ThirdPartySharing - range: boolean + - DistributionDate + range: string + multivalued: true id: name: id description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: ThirdPartySharing + owner: DistributionDate domain_of: - DatasetCollection - Dataset @@ -20650,7 +23260,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: ThirdPartySharing + owner: DistributionDate domain_of: - DatasetCollection - Dataset @@ -20733,7 +23343,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: ThirdPartySharing + owner: DistributionDate domain_of: - DatasetCollection - Dataset @@ -20816,7 +23426,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: ThirdPartySharing + owner: DistributionDate domain_of: - DatasetProperty - Purpose @@ -20886,24 +23496,38 @@ classes: multivalued: true inlined: true inlined_as_list: true - DistributionFormat: - name: DistributionFormat - description: 'How will the dataset be distributed (e.g., tarball on a website, - API, GitHub)? + Maintainer: + name: Maintainer + description: 'Who will be supporting/hosting/maintaining the dataset? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - access_urls: - name: access_urls - description: Details of the distribution channel(s) or format(s). - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/distribution - slot_uri: dcat:accessURL - alias: access_urls - owner: DistributionFormat + role: + name: role + description: 'Role of the maintainer (e.g., researcher, platform, organization). + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance + slot_uri: schema:maintainer + alias: role + owner: Maintainer domain_of: - - DistributionFormat + - DataCollector + - Maintainer + range: CreatorOrMaintainerEnum + maintainer_details: + name: maintainer_details + description: 'Details on who will support, host, or maintain the dataset. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance + slot_uri: dcterms:description + alias: maintainer_details + owner: Maintainer + domain_of: + - Maintainer range: string multivalued: true id: @@ -20912,7 +23536,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: DistributionFormat + owner: Maintainer domain_of: - DatasetCollection - Dataset @@ -20995,7 +23619,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: DistributionFormat + owner: Maintainer domain_of: - DatasetCollection - Dataset @@ -21078,7 +23702,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: DistributionFormat + owner: Maintainer domain_of: - DatasetCollection - Dataset @@ -21161,7 +23785,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: DistributionFormat + owner: Maintainer domain_of: - DatasetProperty - Purpose @@ -21231,26 +23855,36 @@ classes: multivalued: true inlined: true inlined_as_list: true - DistributionDate: - name: DistributionDate - description: 'When will the dataset be distributed? + Erratum: + name: Erratum + description: 'Is there an erratum? If so, please provide a link or other access + point. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - release_dates: - name: release_dates - description: 'Dates or timeframe for dataset release. Could be a one-time - release date or multiple scheduled releases. + erratum_url: + name: erratum_url + description: URL or access point for the erratum. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance + slot_uri: dcat:accessURL + alias: erratum_url + owner: Erratum + domain_of: + - Erratum + range: uri + erratum_details: + name: erratum_details + description: 'Details on any errata or corrections to the dataset. ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/distribution - slot_uri: dcterms:available - alias: release_dates - owner: DistributionDate + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance + slot_uri: dcterms:description + alias: erratum_details + owner: Erratum domain_of: - - DistributionDate + - Erratum range: string multivalued: true id: @@ -21259,7 +23893,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: DistributionDate + owner: Erratum domain_of: - DatasetCollection - Dataset @@ -21342,7 +23976,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: DistributionDate + owner: Erratum domain_of: - DatasetCollection - Dataset @@ -21425,7 +24059,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: DistributionDate + owner: Erratum domain_of: - DatasetCollection - Dataset @@ -21508,7 +24142,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: DistributionDate + owner: Erratum domain_of: - DatasetProperty - Purpose @@ -21578,38 +24212,40 @@ classes: multivalued: true inlined: true inlined_as_list: true - Maintainer: - name: Maintainer - description: 'Who will be supporting/hosting/maintaining the dataset? + UpdatePlan: + name: UpdatePlan + description: 'Will the dataset be updated (e.g., to correct labeling errors, add + new instances, delete instances)? If so, how often, by whom, and how will these + updates be communicated? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - rai:dataReleaseMaintenancePlan is_a: DatasetProperty attributes: - role: - name: role - description: 'Role of the maintainer (e.g., researcher, platform, organization). - - ' + frequency: + name: frequency + description: How often updates are planned (e.g., quarterly, annually). from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance - slot_uri: schema:maintainer - alias: role - owner: Maintainer + slot_uri: d4d:frequency + alias: frequency + owner: UpdatePlan domain_of: - - DataCollector - - Maintainer - range: CreatorOrMaintainerEnum - maintainer_details: - name: maintainer_details - description: 'Details on who will support, host, or maintain the dataset. + - UpdatePlan + range: string + update_details: + name: update_details + description: 'Details on update plans, responsible parties, and communication + methods. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance slot_uri: dcterms:description - alias: maintainer_details - owner: Maintainer + alias: update_details + owner: UpdatePlan domain_of: - - Maintainer + - UpdatePlan range: string multivalued: true id: @@ -21618,7 +24254,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: Maintainer + owner: UpdatePlan domain_of: - DatasetCollection - Dataset @@ -21701,7 +24337,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Maintainer + owner: UpdatePlan domain_of: - DatasetCollection - Dataset @@ -21784,7 +24420,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Maintainer + owner: UpdatePlan domain_of: - DatasetCollection - Dataset @@ -21867,7 +24503,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: Maintainer + owner: UpdatePlan domain_of: - DatasetProperty - Purpose @@ -21937,36 +24573,38 @@ classes: multivalued: true inlined: true inlined_as_list: true - Erratum: - name: Erratum - description: 'Is there an erratum? If so, please provide a link or other access - point. + RetentionLimits: + name: RetentionLimits + description: 'If the dataset relates to people, are there applicable limits on + the retention of their data (e.g., were individuals told their data would be + deleted after a certain time)? If so, please describe these limits and how they + will be enforced. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - erratum_url: - name: erratum_url - description: URL or access point for the erratum. + retention_period: + name: retention_period + description: Time period for data retention. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance - slot_uri: dcat:accessURL - alias: erratum_url - owner: Erratum + slot_uri: d4d:retentionPeriod + alias: retention_period + owner: RetentionLimits domain_of: - - Erratum - range: uri - erratum_details: - name: erratum_details - description: 'Details on any errata or corrections to the dataset. + - RetentionLimits + range: string + retention_details: + name: retention_details + description: 'Details on data retention limits and enforcement procedures. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance slot_uri: dcterms:description - alias: erratum_details - owner: Erratum + alias: retention_details + owner: RetentionLimits domain_of: - - Erratum + - RetentionLimits range: string multivalued: true id: @@ -21975,7 +24613,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: Erratum + owner: RetentionLimits domain_of: - DatasetCollection - Dataset @@ -22058,7 +24696,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: Erratum + owner: RetentionLimits domain_of: - DatasetCollection - Dataset @@ -22141,7 +24779,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: Erratum + owner: RetentionLimits domain_of: - DatasetCollection - Dataset @@ -22224,7 +24862,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: Erratum + owner: RetentionLimits domain_of: - DatasetProperty - Purpose @@ -22294,40 +24932,47 @@ classes: multivalued: true inlined: true inlined_as_list: true - UpdatePlan: - name: UpdatePlan - description: 'Will the dataset be updated (e.g., to correct labeling errors, add - new instances, delete instances)? If so, how often, by whom, and how will these - updates be communicated? + VersionAccess: + name: VersionAccess + description: 'Will older versions of the dataset continue to be supported/hosted/maintained? + If so, how? If not, how will obsolescence be communicated to dataset consumers? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - rai:dataReleaseMaintenancePlan is_a: DatasetProperty attributes: - frequency: - name: frequency - description: How often updates are planned (e.g., quarterly, annually). + latest_version_doi: + name: latest_version_doi + description: DOI or URL of the latest dataset version. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance - slot_uri: d4d:frequency - alias: frequency - owner: UpdatePlan + slot_uri: schema:identifier + alias: latest_version_doi + owner: VersionAccess + domain_of: + - VersionAccess + range: string + versions_available: + name: versions_available + description: List of available versions with metadata. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance + slot_uri: d4d:versionsAvailable + alias: versions_available + owner: VersionAccess domain_of: - - UpdatePlan + - VersionAccess range: string - update_details: - name: update_details - description: 'Details on update plans, responsible parties, and communication - methods. + multivalued: true + version_details: + name: version_details + description: 'Details on version support policies and obsolescence communication. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance slot_uri: dcterms:description - alias: update_details - owner: UpdatePlan + alias: version_details + owner: VersionAccess domain_of: - - UpdatePlan + - VersionAccess range: string multivalued: true id: @@ -22336,7 +24981,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: UpdatePlan + owner: VersionAccess domain_of: - DatasetCollection - Dataset @@ -22419,7 +25064,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: UpdatePlan + owner: VersionAccess domain_of: - DatasetCollection - Dataset @@ -22502,7 +25147,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: UpdatePlan + owner: VersionAccess domain_of: - DatasetCollection - Dataset @@ -22585,7 +25230,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: UpdatePlan + owner: VersionAccess domain_of: - DatasetProperty - Purpose @@ -22655,38 +25300,38 @@ classes: multivalued: true inlined: true inlined_as_list: true - RetentionLimits: - name: RetentionLimits - description: 'If the dataset relates to people, are there applicable limits on - the retention of their data (e.g., were individuals told their data would be - deleted after a certain time)? If so, please describe these limits and how they - will be enforced. + ExtensionMechanism: + name: ExtensionMechanism + description: 'If others want to extend/augment/build on/contribute to the dataset, + is there a mechanism for them to do so? If so, please describe how those contributions + are validated and communicated. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - retention_period: - name: retention_period - description: Time period for data retention. + contribution_url: + name: contribution_url + description: URL for contribution guidelines or process. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance - slot_uri: d4d:retentionPeriod - alias: retention_period - owner: RetentionLimits + slot_uri: dcat:landingPage + alias: contribution_url + owner: ExtensionMechanism domain_of: - - RetentionLimits - range: string - retention_details: - name: retention_details - description: 'Details on data retention limits and enforcement procedures. + - ExtensionMechanism + range: uri + extension_details: + name: extension_details + description: 'Details on extension mechanisms, contribution validation, and + communication. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance slot_uri: dcterms:description - alias: retention_details - owner: RetentionLimits + alias: extension_details + owner: ExtensionMechanism domain_of: - - RetentionLimits + - ExtensionMechanism range: string multivalued: true id: @@ -22695,7 +25340,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: RetentionLimits + owner: ExtensionMechanism domain_of: - DatasetCollection - Dataset @@ -22778,7 +25423,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: RetentionLimits + owner: ExtensionMechanism domain_of: - DatasetCollection - Dataset @@ -22861,7 +25506,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: RetentionLimits + owner: ExtensionMechanism domain_of: - DatasetCollection - Dataset @@ -22944,7 +25589,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: RetentionLimits + owner: ExtensionMechanism domain_of: - DatasetProperty - Purpose @@ -23014,47 +25659,57 @@ classes: multivalued: true inlined: true inlined_as_list: true - VersionAccess: - name: VersionAccess - description: 'Will older versions of the dataset continue to be supported/hosted/maintained? - If so, how? If not, how will obsolescence be communicated to dataset consumers? + EthicalReview: + name: EthicalReview + description: 'Were any ethical or compliance review processes conducted (e.g., + by an institutional review board)? If so, please provide a description of these + review processes, including the frequency of review and documentation of outcomes, + as well as a link or other access point to any supporting documentation. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - latest_version_doi: - name: latest_version_doi - description: DOI or URL of the latest dataset version. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance - slot_uri: schema:identifier - alias: latest_version_doi - owner: VersionAccess + contact_person: + name: contact_person + description: Contact person for questions about ethical review. Provides structured + contact information including name, email, affiliation, and optional ORCID. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics + exact_mappings: + - schema:contactPoint + slot_uri: schema:contactPoint + alias: contact_person + owner: EthicalReview domain_of: - - VersionAccess - range: string - versions_available: - name: versions_available - description: List of available versions with metadata. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance - slot_uri: d4d:versionsAvailable - alias: versions_available - owner: VersionAccess + - EthicalReview + - LicenseAndUseTerms + range: Person + reviewing_organization: + name: reviewing_organization + description: Organization that conducted the ethical review (e.g., Institutional + Review Board, Ethics Committee, Research Ethics Board). Provides information + about the body responsible for ethical oversight. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics + exact_mappings: + - schema:provider + slot_uri: schema:provider + alias: reviewing_organization + owner: EthicalReview domain_of: - - VersionAccess - range: string - multivalued: true - version_details: - name: version_details - description: 'Details on version support policies and obsolescence communication. + - EthicalReview + range: Organization + review_details: + name: review_details + description: 'Details on ethical review processes, outcomes, and supporting + documentation. ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics slot_uri: dcterms:description - alias: version_details - owner: VersionAccess + alias: review_details + owner: EthicalReview domain_of: - - VersionAccess + - EthicalReview range: string multivalued: true id: @@ -23063,7 +25718,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: VersionAccess + owner: EthicalReview domain_of: - DatasetCollection - Dataset @@ -23146,7 +25801,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: VersionAccess + owner: EthicalReview domain_of: - DatasetCollection - Dataset @@ -23229,7 +25884,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: VersionAccess + owner: EthicalReview domain_of: - DatasetCollection - Dataset @@ -23312,7 +25967,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: VersionAccess + owner: EthicalReview domain_of: - DatasetProperty - Purpose @@ -23382,38 +26037,29 @@ classes: multivalued: true inlined: true inlined_as_list: true - ExtensionMechanism: - name: ExtensionMechanism - description: 'If others want to extend/augment/build on/contribute to the dataset, - is there a mechanism for them to do so? If so, please describe how those contributions - are validated and communicated. + DataProtectionImpact: + name: DataProtectionImpact + description: 'Has an analysis of the potential impact of the dataset and its use + on data subjects (e.g., a data protection impact analysis) been conducted? If + so, please provide a description of this analysis, including the outcomes, and + any supporting documentation. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - contribution_url: - name: contribution_url - description: URL for contribution guidelines or process. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance - slot_uri: dcat:landingPage - alias: contribution_url - owner: ExtensionMechanism - domain_of: - - ExtensionMechanism - range: uri - extension_details: - name: extension_details - description: 'Details on extension mechanisms, contribution validation, and - communication. + impact_details: + name: impact_details + description: 'Details on data protection impact analysis, outcomes, and documentation. ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/maintenance + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics slot_uri: dcterms:description - alias: extension_details - owner: ExtensionMechanism + alias: impact_details + owner: DataProtectionImpact domain_of: - - ExtensionMechanism + - FutureUseImpact + - DataProtectionImpact range: string multivalued: true id: @@ -23422,7 +26068,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: ExtensionMechanism + owner: DataProtectionImpact domain_of: - DatasetCollection - Dataset @@ -23505,7 +26151,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: ExtensionMechanism + owner: DataProtectionImpact domain_of: - DatasetCollection - Dataset @@ -23588,7 +26234,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: ExtensionMechanism + owner: DataProtectionImpact domain_of: - DatasetCollection - Dataset @@ -23671,7 +26317,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: ExtensionMechanism + owner: DataProtectionImpact domain_of: - DatasetProperty - Purpose @@ -23723,75 +26369,45 @@ classes: - UpdatePlan - RetentionLimits - VersionAccess - - ExtensionMechanism - - EthicalReview - - DataProtectionImpact - - CollectionNotification - - CollectionConsent - - ConsentRevocation - - HumanSubjectResearch - - InformedConsent - - ParticipantPrivacy - - HumanSubjectCompensation - - AtRiskPopulations - - LicenseAndUseTerms - - IPRestrictions - - ExportControlRegulatoryRestrictions - range: Software - multivalued: true - inlined: true - inlined_as_list: true - EthicalReview: - name: EthicalReview - description: 'Were any ethical or compliance review processes conducted (e.g., - by an institutional review board)? If so, please provide a description of these - review processes, including the frequency of review and documentation of outcomes, - as well as a link or other access point to any supporting documentation. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - is_a: DatasetProperty - attributes: - contact_person: - name: contact_person - description: Contact person for questions about ethical review. Provides structured - contact information including name, email, affiliation, and optional ORCID. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics - exact_mappings: - - schema:contactPoint - slot_uri: schema:contactPoint - alias: contact_person - owner: EthicalReview - domain_of: - - EthicalReview - - LicenseAndUseTerms - range: Person - reviewing_organization: - name: reviewing_organization - description: Organization that conducted the ethical review (e.g., Institutional - Review Board, Ethics Committee, Research Ethics Board). Provides information - about the body responsible for ethical oversight. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics - exact_mappings: - - schema:provider - slot_uri: schema:provider - alias: reviewing_organization - owner: EthicalReview - domain_of: - - EthicalReview - range: Organization - review_details: - name: review_details - description: 'Details on ethical review processes, outcomes, and supporting - documentation. + - ExtensionMechanism + - EthicalReview + - DataProtectionImpact + - CollectionNotification + - CollectionConsent + - ConsentRevocation + - HumanSubjectResearch + - InformedConsent + - ParticipantPrivacy + - HumanSubjectCompensation + - AtRiskPopulations + - LicenseAndUseTerms + - IPRestrictions + - ExportControlRegulatoryRestrictions + range: Software + multivalued: true + inlined: true + inlined_as_list: true + CollectionNotification: + name: CollectionNotification + description: 'Were the individuals in question notified about the data collection? + If so, please describe (or show with screenshots, etc.) how notice was provided, + and reproduce the language of the notification itself if possible. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + is_a: DatasetProperty + attributes: + notification_details: + name: notification_details + description: 'Details on how individuals were notified about data collection. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics slot_uri: dcterms:description - alias: review_details - owner: EthicalReview + alias: notification_details + owner: CollectionNotification domain_of: - - EthicalReview + - CollectionNotification range: string multivalued: true id: @@ -23800,7 +26416,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: EthicalReview + owner: CollectionNotification domain_of: - DatasetCollection - Dataset @@ -23883,7 +26499,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: EthicalReview + owner: CollectionNotification domain_of: - DatasetCollection - Dataset @@ -23966,7 +26582,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: EthicalReview + owner: CollectionNotification domain_of: - DatasetCollection - Dataset @@ -24049,7 +26665,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: EthicalReview + owner: CollectionNotification domain_of: - DatasetProperty - Purpose @@ -24119,29 +26735,27 @@ classes: multivalued: true inlined: true inlined_as_list: true - DataProtectionImpact: - name: DataProtectionImpact - description: 'Has an analysis of the potential impact of the dataset and its use - on data subjects (e.g., a data protection impact analysis) been conducted? If - so, please provide a description of this analysis, including the outcomes, and - any supporting documentation. + CollectionConsent: + name: CollectionConsent + description: 'Did the individuals in question consent to the collection and use + of their data? If so, how was consent requested and provided, and what language + did individuals consent to? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - impact_details: - name: impact_details - description: 'Details on data protection impact analysis, outcomes, and documentation. + consent_details: + name: consent_details + description: 'Details on how consent was requested, provided, and documented. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics slot_uri: dcterms:description - alias: impact_details - owner: DataProtectionImpact + alias: consent_details + owner: CollectionConsent domain_of: - - FutureUseImpact - - DataProtectionImpact + - CollectionConsent range: string multivalued: true id: @@ -24150,7 +26764,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: DataProtectionImpact + owner: CollectionConsent domain_of: - DatasetCollection - Dataset @@ -24233,7 +26847,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: DataProtectionImpact + owner: CollectionConsent domain_of: - DatasetCollection - Dataset @@ -24316,7 +26930,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: DataProtectionImpact + owner: CollectionConsent domain_of: - DatasetCollection - Dataset @@ -24399,7 +27013,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: DataProtectionImpact + owner: CollectionConsent domain_of: - DatasetProperty - Purpose @@ -24469,27 +27083,27 @@ classes: multivalued: true inlined: true inlined_as_list: true - CollectionNotification: - name: CollectionNotification - description: 'Were the individuals in question notified about the data collection? - If so, please describe (or show with screenshots, etc.) how notice was provided, - and reproduce the language of the notification itself if possible. + ConsentRevocation: + name: ConsentRevocation + description: 'If consent was obtained, were the consenting individuals provided + with a mechanism to revoke their consent in the future or for certain uses? + If so, please describe. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - notification_details: - name: notification_details - description: 'Details on how individuals were notified about data collection. + revocation_details: + name: revocation_details + description: 'Details on consent revocation mechanisms and procedures. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics slot_uri: dcterms:description - alias: notification_details - owner: CollectionNotification + alias: revocation_details + owner: ConsentRevocation domain_of: - - CollectionNotification + - ConsentRevocation range: string multivalued: true id: @@ -24498,7 +27112,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: CollectionNotification + owner: ConsentRevocation domain_of: - DatasetCollection - Dataset @@ -24581,7 +27195,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: CollectionNotification + owner: ConsentRevocation domain_of: - DatasetCollection - Dataset @@ -24664,7 +27278,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: CollectionNotification + owner: ConsentRevocation domain_of: - DatasetCollection - Dataset @@ -24747,7 +27361,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: CollectionNotification + owner: ConsentRevocation domain_of: - DatasetProperty - Purpose @@ -24817,27 +27431,79 @@ classes: multivalued: true inlined: true inlined_as_list: true - CollectionConsent: - name: CollectionConsent - description: 'Did the individuals in question consent to the collection and use - of their data? If so, how was consent requested and provided, and what language - did individuals consent to? + HumanSubjectResearch: + name: HumanSubjectResearch + description: 'Information about whether the dataset involves human subjects research + and what regulatory or ethical review processes were followed. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - consent_details: - name: consent_details - description: 'Details on how consent was requested, provided, and documented. + involves_human_subjects: + name: involves_human_subjects + description: Does this dataset involve human subjects research? + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human + slot_uri: d4d:involvesHumanSubjects + alias: involves_human_subjects + owner: HumanSubjectResearch + domain_of: + - HumanSubjectResearch + range: boolean + irb_approval: + name: irb_approval + description: 'Was Institutional Review Board (IRB) approval obtained? Include + approval number and institution if applicable. ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics - slot_uri: dcterms:description - alias: consent_details - owner: CollectionConsent + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human + slot_uri: d4d:irbApproval + alias: irb_approval + owner: HumanSubjectResearch domain_of: - - CollectionConsent + - HumanSubjectResearch + range: string + multivalued: true + ethics_review_board: + name: ethics_review_board + description: 'What ethics review board(s) reviewed this research? Include + institution names and approval details. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human + slot_uri: d4d:ethicsReviewBoard + alias: ethics_review_board + owner: HumanSubjectResearch + domain_of: + - HumanSubjectResearch + range: string + multivalued: true + special_populations: + name: special_populations + description: 'Does the research involve any special populations that require + additional protections (e.g., minors, pregnant women, prisoners)? + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human + slot_uri: d4d:specialPopulations + alias: special_populations + owner: HumanSubjectResearch + domain_of: + - HumanSubjectResearch + range: string + multivalued: true + regulatory_compliance: + name: regulatory_compliance + description: 'What regulatory frameworks govern this human subjects research + (e.g., 45 CFR 46, HIPAA)? + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human + slot_uri: d4d:regulatoryCompliance + alias: regulatory_compliance + owner: HumanSubjectResearch + domain_of: + - HumanSubjectResearch range: string multivalued: true id: @@ -24846,7 +27512,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: CollectionConsent + owner: HumanSubjectResearch domain_of: - DatasetCollection - Dataset @@ -24929,7 +27595,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: CollectionConsent + owner: HumanSubjectResearch domain_of: - DatasetCollection - Dataset @@ -25012,7 +27678,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: CollectionConsent + owner: HumanSubjectResearch domain_of: - DatasetCollection - Dataset @@ -25095,7 +27761,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: CollectionConsent + owner: HumanSubjectResearch domain_of: - DatasetProperty - Purpose @@ -25165,27 +27831,79 @@ classes: multivalued: true inlined: true inlined_as_list: true - ConsentRevocation: - name: ConsentRevocation - description: 'If consent was obtained, were the consenting individuals provided - with a mechanism to revoke their consent in the future or for certain uses? - If so, please describe. + InformedConsent: + name: InformedConsent + description: 'Details about informed consent procedures used in human subjects + research. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - revocation_details: - name: revocation_details - description: 'Details on consent revocation mechanisms and procedures. + consent_obtained: + name: consent_obtained + description: Was informed consent obtained from all participants? + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human + slot_uri: d4d:consentObtained + alias: consent_obtained + owner: InformedConsent + domain_of: + - InformedConsent + range: boolean + consent_type: + name: consent_type + description: 'What type of consent was obtained (e.g., written, verbal, electronic, + implied through participation)? + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human + slot_uri: d4d:consentType + alias: consent_type + owner: InformedConsent + domain_of: + - InformedConsent + range: string + multivalued: true + consent_documentation: + name: consent_documentation + description: 'How is consent documented? Include references to consent forms + or procedures used. + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human + slot_uri: d4d:consentDocumentation + alias: consent_documentation + owner: InformedConsent + domain_of: + - InformedConsent + range: string + multivalued: true + withdrawal_mechanism: + name: withdrawal_mechanism + description: 'How can participants withdraw their consent? What procedures + are in place for data deletion upon withdrawal? + + ' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human + slot_uri: d4d:withdrawalMechanism + alias: withdrawal_mechanism + owner: InformedConsent + domain_of: + - InformedConsent + range: string + multivalued: true + consent_scope: + name: consent_scope + description: 'What specific uses did participants consent to? Are there limitations + on data use based on consent? ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/ethics - slot_uri: dcterms:description - alias: revocation_details - owner: ConsentRevocation + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human + slot_uri: d4d:consentScope + alias: consent_scope + owner: InformedConsent domain_of: - - ConsentRevocation + - InformedConsent range: string multivalued: true id: @@ -25194,7 +27912,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: ConsentRevocation + owner: InformedConsent domain_of: - DatasetCollection - Dataset @@ -25277,7 +27995,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: ConsentRevocation + owner: InformedConsent domain_of: - DatasetCollection - Dataset @@ -25360,7 +28078,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: ConsentRevocation + owner: InformedConsent domain_of: - DatasetCollection - Dataset @@ -25443,7 +28161,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: ConsentRevocation + owner: InformedConsent domain_of: - DatasetProperty - Purpose @@ -25513,79 +28231,69 @@ classes: multivalued: true inlined: true inlined_as_list: true - HumanSubjectResearch: - name: HumanSubjectResearch - description: 'Information about whether the dataset involves human subjects research - and what regulatory or ethical review processes were followed. + ParticipantPrivacy: + name: ParticipantPrivacy + description: 'Information about privacy protections and anonymization procedures + for human research participants. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - involves_human_subjects: - name: involves_human_subjects - description: Does this dataset involve human subjects research? - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:involvesHumanSubjects - alias: involves_human_subjects - owner: HumanSubjectResearch - domain_of: - - HumanSubjectResearch - range: boolean - irb_approval: - name: irb_approval - description: 'Was Institutional Review Board (IRB) approval obtained? Include - approval number and institution if applicable. + anonymization_method: + name: anonymization_method + description: 'What methods were used to anonymize or de-identify participant + data? Include technical details of privacy-preserving techniques. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:irbApproval - alias: irb_approval - owner: HumanSubjectResearch + slot_uri: d4d:anonymizationMethod + alias: anonymization_method + owner: ParticipantPrivacy domain_of: - - HumanSubjectResearch + - ParticipantPrivacy range: string multivalued: true - ethics_review_board: - name: ethics_review_board - description: 'What ethics review board(s) reviewed this research? Include - institution names and approval details. + reidentification_risk: + name: reidentification_risk + description: 'What is the assessed risk of re-identification? What measures + were taken to minimize this risk? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:ethicsReviewBoard - alias: ethics_review_board - owner: HumanSubjectResearch + slot_uri: d4d:reidentificationRisk + alias: reidentification_risk + owner: ParticipantPrivacy domain_of: - - HumanSubjectResearch + - ParticipantPrivacy range: string multivalued: true - special_populations: - name: special_populations - description: 'Does the research involve any special populations that require - additional protections (e.g., minors, pregnant women, prisoners)? + privacy_techniques: + name: privacy_techniques + description: 'What privacy-preserving techniques were applied (e.g., differential + privacy, k-anonymity, data masking)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:specialPopulations - alias: special_populations - owner: HumanSubjectResearch + slot_uri: d4d:privacyTechniques + alias: privacy_techniques + owner: ParticipantPrivacy domain_of: - - HumanSubjectResearch + - ParticipantPrivacy range: string multivalued: true - regulatory_compliance: - name: regulatory_compliance - description: 'What regulatory frameworks govern this human subjects research - (e.g., 45 CFR 46, HIPAA)? + data_linkage: + name: data_linkage + description: 'Can this dataset be linked to other datasets in ways that might + compromise participant privacy? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:regulatoryCompliance - alias: regulatory_compliance - owner: HumanSubjectResearch + slot_uri: d4d:dataLinkage + alias: data_linkage + owner: ParticipantPrivacy domain_of: - - HumanSubjectResearch + - ParticipantPrivacy range: string multivalued: true id: @@ -25594,7 +28302,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: HumanSubjectResearch + owner: ParticipantPrivacy domain_of: - DatasetCollection - Dataset @@ -25677,7 +28385,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: HumanSubjectResearch + owner: ParticipantPrivacy domain_of: - DatasetCollection - Dataset @@ -25760,7 +28468,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: HumanSubjectResearch + owner: ParticipantPrivacy domain_of: - DatasetCollection - Dataset @@ -25843,7 +28551,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: HumanSubjectResearch + owner: ParticipantPrivacy domain_of: - DatasetProperty - Purpose @@ -25913,79 +28621,65 @@ classes: multivalued: true inlined: true inlined_as_list: true - InformedConsent: - name: InformedConsent - description: 'Details about informed consent procedures used in human subjects - research. + HumanSubjectCompensation: + name: HumanSubjectCompensation + description: 'Information about compensation or incentives provided to human research + participants. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - consent_obtained: - name: consent_obtained - description: Was informed consent obtained from all participants? + compensation_provided: + name: compensation_provided + description: Were participants compensated for their participation? from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:consentObtained - alias: consent_obtained - owner: InformedConsent + slot_uri: d4d:compensationProvided + alias: compensation_provided + owner: HumanSubjectCompensation domain_of: - - InformedConsent + - HumanSubjectCompensation range: boolean - consent_type: - name: consent_type - description: 'What type of consent was obtained (e.g., written, verbal, electronic, - implied through participation)? - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:consentType - alias: consent_type - owner: InformedConsent - domain_of: - - InformedConsent - range: string - multivalued: true - consent_documentation: - name: consent_documentation - description: 'How is consent documented? Include references to consent forms - or procedures used. + compensation_type: + name: compensation_type + description: 'What type of compensation was provided (e.g., monetary payment, + gift cards, course credit, other incentives)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:consentDocumentation - alias: consent_documentation - owner: InformedConsent + slot_uri: d4d:compensationType + alias: compensation_type + owner: HumanSubjectCompensation domain_of: - - InformedConsent + - HumanSubjectCompensation range: string multivalued: true - withdrawal_mechanism: - name: withdrawal_mechanism - description: 'How can participants withdraw their consent? What procedures - are in place for data deletion upon withdrawal? + compensation_amount: + name: compensation_amount + description: 'What was the amount or value of compensation provided? Include + currency or equivalent value. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:withdrawalMechanism - alias: withdrawal_mechanism - owner: InformedConsent + slot_uri: d4d:compensationAmount + alias: compensation_amount + owner: HumanSubjectCompensation domain_of: - - InformedConsent + - HumanSubjectCompensation range: string multivalued: true - consent_scope: - name: consent_scope - description: 'What specific uses did participants consent to? Are there limitations - on data use based on consent? + compensation_rationale: + name: compensation_rationale + description: 'What was the rationale for the compensation structure? How was + the amount determined to be appropriate? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:consentScope - alias: consent_scope - owner: InformedConsent + slot_uri: d4d:compensationRationale + alias: compensation_rationale + owner: HumanSubjectCompensation domain_of: - - InformedConsent + - HumanSubjectCompensation range: string multivalued: true id: @@ -25994,7 +28688,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: InformedConsent + owner: HumanSubjectCompensation domain_of: - DatasetCollection - Dataset @@ -26077,7 +28771,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: InformedConsent + owner: HumanSubjectCompensation domain_of: - DatasetCollection - Dataset @@ -26160,7 +28854,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: InformedConsent + owner: HumanSubjectCompensation domain_of: - DatasetCollection - Dataset @@ -26243,7 +28937,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: InformedConsent + owner: HumanSubjectCompensation domain_of: - DatasetProperty - Purpose @@ -26313,69 +29007,68 @@ classes: multivalued: true inlined: true inlined_as_list: true - ParticipantPrivacy: - name: ParticipantPrivacy - description: 'Information about privacy protections and anonymization procedures - for human research participants. + AtRiskPopulations: + name: AtRiskPopulations + description: 'Information about protections for at-risk populations in human subjects + research. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - anonymization_method: - name: anonymization_method - description: 'What methods were used to anonymize or de-identify participant - data? Include technical details of privacy-preserving techniques. + at_risk_groups_included: + name: at_risk_groups_included + description: 'Are any at-risk populations included (e.g., children, pregnant + women, prisoners, cognitively impaired individuals)? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:anonymizationMethod - alias: anonymization_method - owner: ParticipantPrivacy + slot_uri: d4d:atRiskGroupsIncluded + alias: at_risk_groups_included + owner: AtRiskPopulations domain_of: - - ParticipantPrivacy - range: string - multivalued: true - reidentification_risk: - name: reidentification_risk - description: 'What is the assessed risk of re-identification? What measures - were taken to minimize this risk? + - AtRiskPopulations + range: boolean + special_protections: + name: special_protections + description: 'What additional protections were implemented for at-risk populations? + Include safeguards, modified procedures, or additional oversight. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:reidentificationRisk - alias: reidentification_risk - owner: ParticipantPrivacy + slot_uri: d4d:specialProtections + alias: special_protections + owner: AtRiskPopulations domain_of: - - ParticipantPrivacy + - AtRiskPopulations range: string multivalued: true - privacy_techniques: - name: privacy_techniques - description: 'What privacy-preserving techniques were applied (e.g., differential - privacy, k-anonymity, data masking)? + assent_procedures: + name: assent_procedures + description: 'For research involving minors, what assent procedures were used? + How was developmentally appropriate assent obtained? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:privacyTechniques - alias: privacy_techniques - owner: ParticipantPrivacy + slot_uri: d4d:assentProcedures + alias: assent_procedures + owner: AtRiskPopulations domain_of: - - ParticipantPrivacy + - AtRiskPopulations range: string multivalued: true - data_linkage: - name: data_linkage - description: 'Can this dataset be linked to other datasets in ways that might - compromise participant privacy? + guardian_consent: + name: guardian_consent + description: 'For participants unable to provide their own consent, how was + guardian or surrogate consent obtained? ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:dataLinkage - alias: data_linkage - owner: ParticipantPrivacy + slot_uri: d4d:guardianConsent + alias: guardian_consent + owner: AtRiskPopulations domain_of: - - ParticipantPrivacy + - AtRiskPopulations range: string multivalued: true id: @@ -26384,7 +29077,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: ParticipantPrivacy + owner: AtRiskPopulations domain_of: - DatasetCollection - Dataset @@ -26467,7 +29160,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: ParticipantPrivacy + owner: AtRiskPopulations domain_of: - DatasetCollection - Dataset @@ -26550,7 +29243,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: ParticipantPrivacy + owner: AtRiskPopulations domain_of: - DatasetCollection - Dataset @@ -26633,7 +29326,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: ParticipantPrivacy + owner: AtRiskPopulations domain_of: - DatasetProperty - Purpose @@ -26703,74 +29396,69 @@ classes: multivalued: true inlined: true inlined_as_list: true - HumanSubjectCompensation: - name: HumanSubjectCompensation - description: 'Information about compensation or incentives provided to human research - participants. + LicenseAndUseTerms: + name: LicenseAndUseTerms + description: 'Will the dataset be distributed under a copyright or other IP license, + and/or under applicable terms of use? Provide a link or copy of relevant licensing + terms and any fees. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - compensation_provided: - name: compensation_provided - description: Were participants compensated for their participation? - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:compensationProvided - alias: compensation_provided - owner: HumanSubjectCompensation - domain_of: - - HumanSubjectCompensation - range: boolean - compensation_type: - name: compensation_type - description: 'What type of compensation was provided (e.g., monetary payment, - gift cards, course credit, other incentives)? + license_terms: + name: license_terms + description: 'Description of the dataset''s license and terms of use (including + links, costs, or usage constraints). ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:compensationType - alias: compensation_type - owner: HumanSubjectCompensation + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance + slot_uri: dcterms:license + alias: license_terms + owner: LicenseAndUseTerms domain_of: - - HumanSubjectCompensation + - LicenseAndUseTerms range: string multivalued: true - compensation_amount: - name: compensation_amount - description: 'What was the amount or value of compensation provided? Include - currency or equivalent value. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:compensationAmount - alias: compensation_amount - owner: HumanSubjectCompensation + data_use_permission: + name: data_use_permission + description: Structured data use permissions using the Data Use Ontology (DUO). + Specifies permitted uses (e.g., general research, health/medical research, + disease-specific research) and restrictions (e.g., non-commercial use, ethics + approval required, collaboration required). See https://github.com/EBISPOT/DUO + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance + exact_mappings: + - DUO:0000001 + slot_uri: DUO:0000001 + alias: data_use_permission + owner: LicenseAndUseTerms domain_of: - - HumanSubjectCompensation - range: string + - LicenseAndUseTerms + range: DataUsePermissionEnum multivalued: true - compensation_rationale: - name: compensation_rationale - description: 'What was the rationale for the compensation structure? How was - the amount determined to be appropriate? - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:compensationRationale - alias: compensation_rationale - owner: HumanSubjectCompensation + contact_person: + name: contact_person + description: Contact person for licensing questions. Provides structured contact + information including name, email, affiliation, and optional ORCID. This + person can answer questions about licensing terms, usage restrictions, fees, + and permissions. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance + exact_mappings: + - schema:contactPoint + slot_uri: schema:contactPoint + alias: contact_person + owner: LicenseAndUseTerms domain_of: - - HumanSubjectCompensation - range: string - multivalued: true + - EthicalReview + - LicenseAndUseTerms + range: Person id: name: id description: An optional identifier for this property. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: HumanSubjectCompensation + owner: LicenseAndUseTerms domain_of: - DatasetCollection - Dataset @@ -26853,7 +29541,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: HumanSubjectCompensation + owner: LicenseAndUseTerms domain_of: - DatasetCollection - Dataset @@ -26936,7 +29624,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: HumanSubjectCompensation + owner: LicenseAndUseTerms domain_of: - DatasetCollection - Dataset @@ -27019,7 +29707,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: HumanSubjectCompensation + owner: LicenseAndUseTerms domain_of: - DatasetProperty - Purpose @@ -27089,68 +29777,30 @@ classes: multivalued: true inlined: true inlined_as_list: true - AtRiskPopulations: - name: AtRiskPopulations - description: 'Information about protections for at-risk populations in human subjects - research. + IPRestrictions: + name: IPRestrictions + description: 'Have any third parties imposed IP-based or other restrictions on + the data associated with the instances? If so, describe them and note any relevant + fees or licensing terms. Maps to DUO terms related to commercial/non-profit + use restrictions (NCU, NPU, NPUNCU). ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - at_risk_groups_included: - name: at_risk_groups_included - description: 'Are any at-risk populations included (e.g., children, pregnant - women, prisoners, cognitively impaired individuals)? - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:atRiskGroupsIncluded - alias: at_risk_groups_included - owner: AtRiskPopulations - domain_of: - - AtRiskPopulations - range: boolean - special_protections: - name: special_protections - description: 'What additional protections were implemented for at-risk populations? - Include safeguards, modified procedures, or additional oversight. - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:specialProtections - alias: special_protections - owner: AtRiskPopulations - domain_of: - - AtRiskPopulations - range: string - multivalued: true - assent_procedures: - name: assent_procedures - description: 'For research involving minors, what assent procedures were used? - How was developmentally appropriate assent obtained? - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:assentProcedures - alias: assent_procedures - owner: AtRiskPopulations - domain_of: - - AtRiskPopulations - range: string - multivalued: true - guardian_consent: - name: guardian_consent - description: 'For participants unable to provide their own consent, how was - guardian or surrogate consent obtained? - - ' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/human - slot_uri: d4d:guardianConsent - alias: guardian_consent - owner: AtRiskPopulations + restrictions: + name: restrictions + description: Explanation of third-party IP restrictions. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance + broad_mappings: + - DUO:0000046 + - DUO:0000045 + slot_uri: dcterms:rights + alias: restrictions + owner: IPRestrictions domain_of: - - AtRiskPopulations + - ExternalResource + - IPRestrictions range: string multivalued: true id: @@ -27159,7 +29809,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: AtRiskPopulations + owner: IPRestrictions domain_of: - DatasetCollection - Dataset @@ -27242,7 +29892,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: AtRiskPopulations + owner: IPRestrictions domain_of: - DatasetCollection - Dataset @@ -27325,7 +29975,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: AtRiskPopulations + owner: IPRestrictions domain_of: - DatasetCollection - Dataset @@ -27408,7 +30058,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: AtRiskPopulations + owner: IPRestrictions domain_of: - DatasetProperty - Purpose @@ -27478,61 +30128,83 @@ classes: multivalued: true inlined: true inlined_as_list: true - LicenseAndUseTerms: - name: LicenseAndUseTerms - description: 'Will the dataset be distributed under a copyright or other IP license, - and/or under applicable terms of use? Provide a link or copy of relevant licensing - terms and any fees. + ExportControlRegulatoryRestrictions: + name: ExportControlRegulatoryRestrictions + description: 'Do any export controls or other regulatory restrictions apply to + the dataset or to individual instances? Includes compliance tracking for regulations + like HIPAA and other US regulations. If so, please describe these restrictions + and provide a link or copy of any supporting documentation. Maps to DUO terms + related to ethics approval, geographic restrictions, and institutional requirements. ' from_schema: https://w3id.org/bridge2ai/data-sheets-schema is_a: DatasetProperty attributes: - license_terms: - name: license_terms - description: 'Description of the dataset''s license and terms of use (including - links, costs, or usage constraints). - - ' + regulatory_restrictions: + name: regulatory_restrictions + description: Export or regulatory restrictions on the dataset. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance + broad_mappings: + - DUO:0000021 + - DUO:0000022 + - DUO:0000028 + slot_uri: dcterms:accessRights + alias: regulatory_restrictions + owner: ExportControlRegulatoryRestrictions + domain_of: + - Dataset + - DataSubset + - ExportControlRegulatoryRestrictions + range: string + multivalued: true + hipaa_compliant: + name: hipaa_compliant + description: Indicates compliance with the Health Insurance Portability and + Accountability Act (HIPAA). HIPAA applies to protected health information + in the United States. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance + slot_uri: d4d:hipaaCompliant + alias: hipaa_compliant + owner: ExportControlRegulatoryRestrictions + domain_of: + - ExportControlRegulatoryRestrictions + range: ComplianceStatusEnum + other_compliance: + name: other_compliance + description: Other regulatory compliance frameworks applicable to this dataset + (e.g., CCPA, PIPEDA, industry-specific regulations). from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance - slot_uri: dcterms:license - alias: license_terms - owner: LicenseAndUseTerms + slot_uri: d4d:otherCompliance + alias: other_compliance + owner: ExportControlRegulatoryRestrictions domain_of: - - LicenseAndUseTerms + - ExportControlRegulatoryRestrictions range: string multivalued: true - data_use_permission: - name: data_use_permission - description: Structured data use permissions using the Data Use Ontology (DUO). - Specifies permitted uses (e.g., general research, health/medical research, - disease-specific research) and restrictions (e.g., non-commercial use, ethics - approval required, collaboration required). See https://github.com/EBISPOT/DUO + confidentiality_level: + name: confidentiality_level + description: Confidentiality classification of the dataset indicating level + of access restrictions and sensitivity. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance - exact_mappings: - - DUO:0000001 - slot_uri: DUO:0000001 - alias: data_use_permission - owner: LicenseAndUseTerms + slot_uri: d4d:confidentialityLevel + alias: confidentiality_level + owner: ExportControlRegulatoryRestrictions domain_of: - - LicenseAndUseTerms - range: DataUsePermissionEnum - multivalued: true - contact_person: - name: contact_person - description: Contact person for licensing questions. Provides structured contact - information including name, email, affiliation, and optional ORCID. This - person can answer questions about licensing terms, usage restrictions, fees, - and permissions. + - ExportControlRegulatoryRestrictions + range: ConfidentialityLevelEnum + governance_committee_contact: + name: governance_committee_contact + description: Contact person for data governance committee. This person can + answer questions about data governance policies, access procedures, and + oversight mechanisms. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance exact_mappings: - schema:contactPoint slot_uri: schema:contactPoint - alias: contact_person - owner: LicenseAndUseTerms + alias: governance_committee_contact + owner: ExportControlRegulatoryRestrictions domain_of: - - EthicalReview - - LicenseAndUseTerms + - ExportControlRegulatoryRestrictions range: Person id: name: id @@ -27540,7 +30212,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: LicenseAndUseTerms + owner: ExportControlRegulatoryRestrictions domain_of: - DatasetCollection - Dataset @@ -27623,7 +30295,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: LicenseAndUseTerms + owner: ExportControlRegulatoryRestrictions domain_of: - DatasetCollection - Dataset @@ -27706,7 +30378,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: LicenseAndUseTerms + owner: ExportControlRegulatoryRestrictions domain_of: - DatasetCollection - Dataset @@ -27789,7 +30461,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: LicenseAndUseTerms + owner: ExportControlRegulatoryRestrictions domain_of: - DatasetProperty - Purpose @@ -27859,30 +30531,181 @@ classes: multivalued: true inlined: true inlined_as_list: true - IPRestrictions: - name: IPRestrictions - description: 'Have any third parties imposed IP-based or other restrictions on - the data associated with the instances? If so, describe them and note any relevant - fees or licensing terms. Maps to DUO terms related to commercial/non-profit - use restrictions (NCU, NPU, NPUNCU). - - ' + VariableMetadata: + name: VariableMetadata + description: Metadata describing an individual variable, field, or column in a + dataset. Variables may represent measurements, observations, derived values, + or categorical attributes. from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - schema:PropertyValue is_a: DatasetProperty attributes: - restrictions: - name: restrictions - description: Explanation of third-party IP restrictions. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance - broad_mappings: - - DUO:0000046 - - DUO:0000045 - slot_uri: dcterms:rights - alias: restrictions - owner: IPRestrictions + variable_name: + name: variable_name + description: The name or identifier of the variable as it appears in the data + files. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + exact_mappings: + - schema:name + slot_uri: schema:name + alias: variable_name + owner: VariableMetadata domain_of: - - ExternalResource - - IPRestrictions + - VariableMetadata + range: string + required: true + data_type: + name: data_type + description: The data type of the variable (e.g., integer, float, string, + boolean, date, categorical). Use standard type names when possible. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + exact_mappings: + - schema:DataType + slot_uri: schema:DataType + alias: data_type + owner: VariableMetadata + domain_of: + - VariableMetadata + range: VariableTypeEnum + unit: + name: unit + description: 'The unit of measurement for the variable, preferably using QUDT + units (http://qudt.org/vocab/unit/). Examples: qudt:Kilogram, qudt:Meter, + qudt:DegreeCelsius.' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + exact_mappings: + - qudt:hasUnit + - schema:unitCode + slot_uri: qudt:unit + alias: unit + owner: VariableMetadata + domain_of: + - VariableMetadata + range: uriorcurie + missing_value_code: + name: missing_value_code + description: 'Code(s) used to represent missing values for this variable. + Examples: "NA", "-999", "null", "". Multiple codes may be specified.' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: d4d:missingValueCode + alias: missing_value_code + owner: VariableMetadata + domain_of: + - VariableMetadata + range: string + multivalued: true + minimum_value: + name: minimum_value + description: The minimum value that the variable can take. Applicable to numeric + variables. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: schema:minValue + alias: minimum_value + owner: VariableMetadata + domain_of: + - VariableMetadata + range: float + maximum_value: + name: maximum_value + description: The maximum value that the variable can take. Applicable to numeric + variables. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: schema:maxValue + alias: maximum_value + owner: VariableMetadata + domain_of: + - VariableMetadata + range: float + categories: + name: categories + description: The permitted categories or values for a categorical variable. + Each entry should describe a possible value and its meaning. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: schema:valueReference + alias: categories + owner: VariableMetadata + domain_of: + - VariableMetadata + range: string + multivalued: true + examples: + name: examples + description: Example values for this variable to illustrate typical data. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: skos:example + alias: examples + owner: VariableMetadata + domain_of: + - ExistingUse + - IntendedUse + - VariableMetadata + range: string + multivalued: true + is_identifier: + name: is_identifier + description: Indicates whether this variable serves as a unique identifier + or key for records in the dataset. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: schema:identifier + alias: is_identifier + owner: VariableMetadata + domain_of: + - VariableMetadata + range: boolean + is_sensitive: + name: is_sensitive + description: Indicates whether this variable contains sensitive information + (e.g., personal data, protected health information). + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: d4d:isSensitive + alias: is_sensitive + owner: VariableMetadata + domain_of: + - VariableMetadata + range: boolean + precision: + name: precision + description: The precision or number of decimal places for numeric variables. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: schema:valuePrecision + alias: precision + owner: VariableMetadata + domain_of: + - VariableMetadata + range: integer + measurement_technique: + name: measurement_technique + description: 'The technique or method used to measure this variable. Examples: + "mass spectrometry", "self-report survey", "GPS coordinates".' + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: schema:measurementTechnique + alias: measurement_technique + owner: VariableMetadata + domain_of: + - VariableMetadata + range: string + derivation: + name: derivation + description: Description of how this variable was derived or calculated from + other variables, if applicable. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: dcterms:provenance + alias: derivation + owner: VariableMetadata + domain_of: + - VariableMetadata + range: string + quality_notes: + name: quality_notes + description: Notes about data quality, reliability, or known issues specific + to this variable. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + slot_uri: dcterms:description + alias: quality_notes + owner: VariableMetadata + domain_of: + - VariableMetadata range: string multivalued: true id: @@ -27891,7 +30714,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier alias: id - owner: IPRestrictions + owner: VariableMetadata domain_of: - DatasetCollection - Dataset @@ -27974,7 +30797,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: IPRestrictions + owner: VariableMetadata domain_of: - DatasetCollection - Dataset @@ -28057,7 +30880,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: IPRestrictions + owner: VariableMetadata domain_of: - DatasetCollection - Dataset @@ -28140,7 +30963,7 @@ classes: from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: d4d:usedSoftware alias: used_software - owner: IPRestrictions + owner: VariableMetadata domain_of: - DatasetProperty - Purpose @@ -28210,98 +31033,430 @@ classes: multivalued: true inlined: true inlined_as_list: true - ExportControlRegulatoryRestrictions: - name: ExportControlRegulatoryRestrictions - description: 'Do any export controls or other regulatory restrictions apply to - the dataset or to individual instances? Includes compliance tracking for regulations - like HIPAA and other US regulations. If so, please describe these restrictions - and provide a link or copy of any supporting documentation. Maps to DUO terms - related to ethics approval, geographic restrictions, and institutional requirements. - - ' + class_uri: schema:PropertyValue + File: + name: File + description: A single file within a dataset or file collection. Represents an + individual data file, code file, documentation file, etc. Maps to RO-Crate File + entities. from_schema: https://w3id.org/bridge2ai/data-sheets-schema - is_a: DatasetProperty + aliases: + - data file + - file + - file object + exact_mappings: + - schema:DigitalDocument + is_a: Information + slots: + - bytes + - path + - format + - encoding + - compression + - media_type + - hash + - md5 + - sha256 + - dialect attributes: - regulatory_restrictions: - name: regulatory_restrictions - description: Export or regulatory restrictions on the dataset. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance - broad_mappings: - - DUO:0000021 - - DUO:0000022 - - DUO:0000028 - slot_uri: dcterms:accessRights - alias: regulatory_restrictions - owner: ExportControlRegulatoryRestrictions + file_type: + name: file_type + description: Semantic type or purpose of this file (e.g., data_file, code_file, + documentation_file, metadata_file). + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/file-collection + slot_uri: d4d:fileType + alias: file_type + owner: File + domain_of: + - File + range: FileTypeEnum + bytes: + name: bytes + description: Size of the data in bytes. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:byteSize + alias: bytes + owner: File + domain_of: + - File + range: integer + path: + name: path + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: schema:contentUrl + alias: path + owner: File + domain_of: + - File + - FileCollection + range: string + format: + name: format + description: The file format, physical medium, or dimensions of a resource. + This should be a file extension or MIME type. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:format + alias: format + owner: File + domain_of: + - File + range: FormatEnum + encoding: + name: encoding + description: the character encoding of the data + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:mediaType + alias: encoding + owner: File + domain_of: + - File + range: EncodingEnum + compression: + name: compression + description: compression format used, if any. e.g., gzip, bzip2, zip + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:compressFormat + alias: compression + owner: File + domain_of: + - Information + - File + - FileCollection + - DatasetCollection + - Dataset + - DataSubset + range: CompressionEnum + media_type: + name: media_type + description: The media type of the data. This should be a MIME type. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - schema:encodingFormat + slot_uri: dcat:mediaType + alias: media_type + owner: File + domain_of: + - File + range: MediaTypeEnum + hash: + name: hash + description: hash of the data + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:identifier + alias: hash + owner: File + domain_of: + - File + range: string + md5: + name: md5 + description: md5 hash of the data + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:identifier + alias: md5 + owner: File + domain_of: + - File + range: string + sha256: + name: sha256 + description: sha256 hash of the data + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:identifier + alias: sha256 + owner: File + domain_of: + - File + range: string + dialect: + name: dialect + description: Specific format dialect or variation (e.g., CSV dialect, JSON-LD + profile). + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: schema:encodingFormat + alias: dialect + owner: File + domain_of: + - File + range: string + conforms_to: + name: conforms_to + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:conformsTo + alias: conforms_to + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + conforms_to_class: + name: conforms_to_class + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:conformsTo + alias: conforms_to_class + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + conforms_to_schema: + name: conforms_to_schema + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:conformsTo + alias: conforms_to_schema + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + created_by: + name: created_by + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:creator + alias: created_by + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + created_on: + name: created_on + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:created + alias: created_on + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: datetime + doi: + name: doi + description: digital object identifier + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:identifier + alias: doi + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + pattern: 10\.\d{4,}\/.+ + download_url: + name: download_url + description: URL from which the data can be downloaded. This is not the same + as the landing page, which is a page that describes the dataset. Rather, + this URL points directly to the data itself. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - schema:url + slot_uri: dcat:downloadURL + alias: download_url + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: uri + issued: + name: issued + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:issued + alias: issued + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: datetime + keywords: + name: keywords + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:keyword + alias: keywords + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + multivalued: true + language: + name: language + description: language in which the information is expressed + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - schema:inLanguage + slot_uri: dcterms:language + alias: language + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + last_updated_on: + name: last_updated_on + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:modified + alias: last_updated_on + owner: File domain_of: + - Information + - DatasetCollection - Dataset - DataSubset - - ExportControlRegulatoryRestrictions + - File + range: datetime + license: + name: license + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:license + alias: license + owner: File + domain_of: + - Software + - Information + - DatasetCollection + - Dataset + - DataSubset + - File range: string - multivalued: true - hipaa_compliant: - name: hipaa_compliant - description: Indicates compliance with the Health Insurance Portability and - Accountability Act (HIPAA). HIPAA applies to protected health information - in the United States. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance - slot_uri: d4d:hipaaCompliant - alias: hipaa_compliant - owner: ExportControlRegulatoryRestrictions + modified_by: + name: modified_by + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:contributor + alias: modified_by + owner: File domain_of: - - ExportControlRegulatoryRestrictions - range: ComplianceStatusEnum - other_compliance: - name: other_compliance - description: Other regulatory compliance frameworks applicable to this dataset - (e.g., CCPA, PIPEDA, industry-specific regulations). - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance - slot_uri: d4d:otherCompliance - alias: other_compliance - owner: ExportControlRegulatoryRestrictions + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + page: + name: page + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:landingPage + alias: page + owner: File domain_of: - - ExportControlRegulatoryRestrictions + - Information + - DatasetCollection + - Dataset + - DataSubset + - File range: string - multivalued: true - confidentiality_level: - name: confidentiality_level - description: Confidentiality classification of the dataset indicating level - of access restrictions and sensitivity. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance - slot_uri: d4d:confidentialityLevel - alias: confidentiality_level - owner: ExportControlRegulatoryRestrictions + publisher: + name: publisher + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:publisher + alias: publisher + owner: File domain_of: - - ExportControlRegulatoryRestrictions - range: ConfidentialityLevelEnum - governance_committee_contact: - name: governance_committee_contact - description: Contact person for data governance committee. This person can - answer questions about data governance policies, access procedures, and - oversight mechanisms. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/data-governance + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: uriorcurie + status: + name: status + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:type + alias: status + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + title: + name: title + description: the official title of the element + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:title + alias: title + owner: File + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + version: + name: version + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:hasVersion + alias: version + owner: File + domain_of: + - Software + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + was_derived_from: + name: was_derived_from + from_schema: https://w3id.org/bridge2ai/data-sheets-schema exact_mappings: - - schema:contactPoint - slot_uri: schema:contactPoint - alias: governance_committee_contact - owner: ExportControlRegulatoryRestrictions + - dcterms:source + slot_uri: prov:wasDerivedFrom + alias: was_derived_from + owner: File domain_of: - - ExportControlRegulatoryRestrictions - range: Person + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string id: name: id - description: An optional identifier for this property. + description: A unique identifier for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier + identifier: true alias: id - owner: ExportControlRegulatoryRestrictions + owner: File domain_of: + - NamedThing + - DatasetProperty - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - Software - Person - Information @@ -28370,21 +31525,24 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: uriorcurie + required: true name: name: name - description: A human-readable name for this property. + description: A human-readable name for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: ExportControlRegulatoryRestrictions + owner: File domain_of: + - NamedThing + - DatasetProperty - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - Software - Person - Information @@ -28453,22 +31611,24 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string description: name: description - description: A human-readable description for this property. + description: A human-readable description for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: ExportControlRegulatoryRestrictions + owner: File domain_of: + - NamedThing + - DatasetProperty + - DatasetRelationship - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - - DatasetRelationship - Software - Person - Information @@ -28536,274 +31696,433 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions - range: string - used_software: - name: used_software - description: What software was used as part of this dataset property? - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: d4d:usedSoftware - alias: used_software - owner: ExportControlRegulatoryRestrictions - domain_of: - - DatasetProperty - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Instance - - SamplingStrategy - - MissingInfo - - Relationships - - Splits - - DataAnomaly - - DatasetBias - - DatasetLimitation - - ExternalResource - - Confidentiality - - ContentWarning - - Subpopulation - - Deidentification - - SensitiveElement - - DatasetRelationship - - InstanceAcquisition - - CollectionMechanism - - DataCollector - - CollectionTimeframe - - DirectCollection - - MissingDataDocumentation - - RawDataSource - - PreprocessingStrategy - - CleaningStrategy - - LabelingStrategy - - RawData - - ImputationProtocol - - AnnotationAnalysis - - MachineAnnotationTools - - ExistingUse - - UseRepository - - OtherTask - - FutureUseImpact - - DiscouragedUse - - IntendedUse - - ProhibitedUse - - ThirdPartySharing - - DistributionFormat - - DistributionDate - - Maintainer - - Erratum - - UpdatePlan - - RetentionLimits - - VersionAccess - - ExtensionMechanism - - EthicalReview - - DataProtectionImpact - - CollectionNotification - - CollectionConsent - - ConsentRevocation - - HumanSubjectResearch - - InformedConsent - - ParticipantPrivacy - - HumanSubjectCompensation - - AtRiskPopulations - - LicenseAndUseTerms - - IPRestrictions - - ExportControlRegulatoryRestrictions - range: Software + - VariableMetadata + - File + range: string + class_uri: schema:MediaObject + FileCollection: + name: FileCollection + description: A collection of files with shared characteristics (format, purpose, + structure). Represents a logical grouping of related files within a dataset, + such as all training data files, all image files, or all raw data files. Maps + to RO-Crate Dataset entities via schema:hasPart relationships. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + aliases: + - file collection + - data files + - file group + exact_mappings: + - schema:Dataset + close_mappings: + - dcat:Distribution + is_a: Information + slots: + - path + - compression + - external_resources + - resources + slot_usage: + path: + name: path + description: Path or URL to the FileCollection. May be a directory path, archive + file path, or download URL depending on how the collection is distributed. + compression: + name: compression + description: Compression format if the collection is packaged as a compressed + archive (e.g., gzip, zip, bzip2). Omit this field for uncompressed collections + or purely logical groupings. + external_resources: + name: external_resources + description: External files or URLs referenced by this file collection. + range: ExternalResource multivalued: true - inlined: true inlined_as_list: true - VariableMetadata: - name: VariableMetadata - description: Metadata describing an individual variable, field, or column in a - dataset. Variables may represent measurements, observations, derived values, - or categorical attributes. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema - exact_mappings: - - schema:PropertyValue - is_a: DatasetProperty + resources: + name: resources + description: Individual files or nested file collections within this collection. + Allows hierarchical file organization with both File objects and nested + FileCollection objects. + multivalued: true + inlined_as_list: true + any_of: + - range: File + - range: FileCollection attributes: - variable_name: - name: variable_name - description: The name or identifier of the variable as it appears in the data - files. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - exact_mappings: - - schema:name - slot_uri: schema:name - alias: variable_name - owner: VariableMetadata + collection_type: + name: collection_type + description: Type(s) of content in this file collection. A collection may + have multiple types, for example a collection containing both raw_data and + documentation files would have both types listed. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/file-collection + slot_uri: d4d:collectionType + alias: collection_type + owner: FileCollection + domain_of: + - FileCollection + range: FileCollectionTypeEnum + multivalued: true + file_count: + name: file_count + description: Number of files in this collection. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/file-collection + slot_uri: d4d:fileCount + alias: file_count + owner: FileCollection + domain_of: + - FileCollection + range: integer + total_bytes: + name: total_bytes + description: Total size of all files in bytes. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema/file-collection + slot_uri: dcat:byteSize + alias: total_bytes + owner: FileCollection domain_of: - - VariableMetadata + - FileCollection + range: integer + path: + name: path + description: Path or URL to the FileCollection. May be a directory path, archive + file path, or download URL depending on how the collection is distributed. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: schema:contentUrl + alias: path + owner: FileCollection + domain_of: + - File + - FileCollection range: string - required: true - data_type: - name: data_type - description: The data type of the variable (e.g., integer, float, string, - boolean, date, categorical). Use standard type names when possible. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - exact_mappings: - - schema:DataType - slot_uri: schema:DataType - alias: data_type - owner: VariableMetadata + compression: + name: compression + description: Compression format if the collection is packaged as a compressed + archive (e.g., gzip, zip, bzip2). Omit this field for uncompressed collections + or purely logical groupings. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:compressFormat + alias: compression + owner: FileCollection domain_of: - - VariableMetadata - range: VariableTypeEnum - unit: - name: unit - description: 'The unit of measurement for the variable, preferably using QUDT - units (http://qudt.org/vocab/unit/). Examples: qudt:Kilogram, qudt:Meter, - qudt:DegreeCelsius.' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables + - Information + - File + - FileCollection + - DatasetCollection + - Dataset + - DataSubset + range: CompressionEnum + external_resources: + name: external_resources + description: External files or URLs referenced by this file collection. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:references + alias: external_resources + owner: FileCollection + domain_of: + - Dataset + - ExternalResource + - FileCollection + - DataSubset + range: ExternalResource + multivalued: true + inlined_as_list: true + resources: + name: resources + description: Individual files or nested file collections within this collection. + Allows hierarchical file organization with both File objects and nested + FileCollection objects. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: schema:hasPart + alias: resources + owner: FileCollection + domain_of: + - DatasetCollection + - Dataset + - FileCollection + - DataSubset + range: Dataset + multivalued: true + inlined_as_list: true + any_of: + - range: File + - range: FileCollection + conforms_to: + name: conforms_to + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:conformsTo + alias: conforms_to + owner: FileCollection + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + conforms_to_class: + name: conforms_to_class + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:conformsTo + alias: conforms_to_class + owner: FileCollection + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + conforms_to_schema: + name: conforms_to_schema + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:conformsTo + alias: conforms_to_schema + owner: FileCollection + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + created_by: + name: created_by + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:creator + alias: created_by + owner: FileCollection + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + created_on: + name: created_on + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:created + alias: created_on + owner: FileCollection + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: datetime + doi: + name: doi + description: digital object identifier + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:identifier + alias: doi + owner: FileCollection + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + pattern: 10\.\d{4,}\/.+ + download_url: + name: download_url + description: URL from which the data can be downloaded. This is not the same + as the landing page, which is a page that describes the dataset. Rather, + this URL points directly to the data itself. + from_schema: https://w3id.org/bridge2ai/data-sheets-schema exact_mappings: - - qudt:hasUnit - - schema:unitCode - slot_uri: qudt:unit - alias: unit - owner: VariableMetadata + - schema:url + slot_uri: dcat:downloadURL + alias: download_url + owner: FileCollection domain_of: - - VariableMetadata - range: uriorcurie - missing_value_code: - name: missing_value_code - description: 'Code(s) used to represent missing values for this variable. - Examples: "NA", "-999", "null", "". Multiple codes may be specified.' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: d4d:missingValueCode - alias: missing_value_code - owner: VariableMetadata + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: uri + issued: + name: issued + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:issued + alias: issued + owner: FileCollection domain_of: - - VariableMetadata + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: datetime + keywords: + name: keywords + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:keyword + alias: keywords + owner: FileCollection + domain_of: + - Information + - DatasetCollection + - Dataset + - DataSubset + - File range: string multivalued: true - minimum_value: - name: minimum_value - description: The minimum value that the variable can take. Applicable to numeric - variables. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: schema:minValue - alias: minimum_value - owner: VariableMetadata + language: + name: language + description: language in which the information is expressed + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - schema:inLanguage + slot_uri: dcterms:language + alias: language + owner: FileCollection domain_of: - - VariableMetadata - range: float - maximum_value: - name: maximum_value - description: The maximum value that the variable can take. Applicable to numeric - variables. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: schema:maxValue - alias: maximum_value - owner: VariableMetadata + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + last_updated_on: + name: last_updated_on + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:modified + alias: last_updated_on + owner: FileCollection domain_of: - - VariableMetadata - range: float - categories: - name: categories - description: The permitted categories or values for a categorical variable. - Each entry should describe a possible value and its meaning. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: schema:valueReference - alias: categories - owner: VariableMetadata + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: datetime + license: + name: license + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:license + alias: license + owner: FileCollection domain_of: - - VariableMetadata + - Software + - Information + - DatasetCollection + - Dataset + - DataSubset + - File range: string - multivalued: true - examples: - name: examples - description: Example values for this variable to illustrate typical data. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: skos:example - alias: examples - owner: VariableMetadata + modified_by: + name: modified_by + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:contributor + alias: modified_by + owner: FileCollection domain_of: - - ExistingUse - - IntendedUse - - VariableMetadata + - Information + - DatasetCollection + - Dataset + - DataSubset + - File range: string - multivalued: true - is_identifier: - name: is_identifier - description: Indicates whether this variable serves as a unique identifier - or key for records in the dataset. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: schema:identifier - alias: is_identifier - owner: VariableMetadata + page: + name: page + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcat:landingPage + alias: page + owner: FileCollection domain_of: - - VariableMetadata - range: boolean - is_sensitive: - name: is_sensitive - description: Indicates whether this variable contains sensitive information - (e.g., personal data, protected health information). - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: d4d:isSensitive - alias: is_sensitive - owner: VariableMetadata + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + publisher: + name: publisher + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:publisher + alias: publisher + owner: FileCollection domain_of: - - VariableMetadata - range: boolean - precision: - name: precision - description: The precision or number of decimal places for numeric variables. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: schema:valuePrecision - alias: precision - owner: VariableMetadata + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: uriorcurie + status: + name: status + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:type + alias: status + owner: FileCollection domain_of: - - VariableMetadata - range: integer - measurement_technique: - name: measurement_technique - description: 'The technique or method used to measure this variable. Examples: - "mass spectrometry", "self-report survey", "GPS coordinates".' - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: schema:measurementTechnique - alias: measurement_technique - owner: VariableMetadata + - Information + - DatasetCollection + - Dataset + - DataSubset + - File + range: string + title: + name: title + description: the official title of the element + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:title + alias: title + owner: FileCollection domain_of: - - VariableMetadata + - Information + - DatasetCollection + - Dataset + - DataSubset + - File range: string - derivation: - name: derivation - description: Description of how this variable was derived or calculated from - other variables, if applicable. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: dcterms:provenance - alias: derivation - owner: VariableMetadata + version: + name: version + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + slot_uri: dcterms:hasVersion + alias: version + owner: FileCollection domain_of: - - VariableMetadata + - Software + - Information + - DatasetCollection + - Dataset + - DataSubset + - File range: string - quality_notes: - name: quality_notes - description: Notes about data quality, reliability, or known issues specific - to this variable. - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/variables - slot_uri: dcterms:description - alias: quality_notes - owner: VariableMetadata + was_derived_from: + name: was_derived_from + from_schema: https://w3id.org/bridge2ai/data-sheets-schema + exact_mappings: + - dcterms:source + slot_uri: prov:wasDerivedFrom + alias: was_derived_from + owner: FileCollection domain_of: - - VariableMetadata + - Information + - DatasetCollection + - Dataset + - DataSubset + - File range: string - multivalued: true id: name: id - description: An optional identifier for this property. + description: A unique identifier for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:identifier + identifier: true alias: id - owner: VariableMetadata + owner: FileCollection domain_of: + - NamedThing + - DatasetProperty - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - Software - Person - Information @@ -28872,21 +32191,24 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: uriorcurie + required: true name: name: name - description: A human-readable name for this property. + description: A human-readable name for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:name alias: name - owner: VariableMetadata + owner: FileCollection domain_of: + - NamedThing + - DatasetProperty - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - Software - Person - Information @@ -28955,22 +32277,24 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string description: name: description - description: A human-readable description for this property. + description: A human-readable description for a thing. from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base slot_uri: schema:description alias: description - owner: VariableMetadata + owner: FileCollection domain_of: + - NamedThing + - DatasetProperty + - DatasetRelationship - DatasetCollection - Dataset - DataSubset - - NamedThing - Organization - - DatasetProperty - - DatasetRelationship - Software - Person - Information @@ -29038,82 +32362,8 @@ classes: - LicenseAndUseTerms - IPRestrictions - ExportControlRegulatoryRestrictions + - VariableMetadata + - File range: string - used_software: - name: used_software - description: What software was used as part of this dataset property? - from_schema: https://w3id.org/bridge2ai/data-sheets-schema/base - slot_uri: d4d:usedSoftware - alias: used_software - owner: VariableMetadata - domain_of: - - DatasetProperty - - Purpose - - Task - - AddressingGap - - Creator - - FundingMechanism - - Instance - - SamplingStrategy - - MissingInfo - - Relationships - - Splits - - DataAnomaly - - DatasetBias - - DatasetLimitation - - ExternalResource - - Confidentiality - - ContentWarning - - Subpopulation - - Deidentification - - SensitiveElement - - DatasetRelationship - - InstanceAcquisition - - CollectionMechanism - - DataCollector - - CollectionTimeframe - - DirectCollection - - MissingDataDocumentation - - RawDataSource - - PreprocessingStrategy - - CleaningStrategy - - LabelingStrategy - - RawData - - ImputationProtocol - - AnnotationAnalysis - - MachineAnnotationTools - - ExistingUse - - UseRepository - - OtherTask - - FutureUseImpact - - DiscouragedUse - - IntendedUse - - ProhibitedUse - - ThirdPartySharing - - DistributionFormat - - DistributionDate - - Maintainer - - Erratum - - UpdatePlan - - RetentionLimits - - VersionAccess - - ExtensionMechanism - - EthicalReview - - DataProtectionImpact - - CollectionNotification - - CollectionConsent - - ConsentRevocation - - HumanSubjectResearch - - InformedConsent - - ParticipantPrivacy - - HumanSubjectCompensation - - AtRiskPopulations - - LicenseAndUseTerms - - IPRestrictions - - ExportControlRegulatoryRestrictions - range: Software - multivalued: true - inlined: true - inlined_as_list: true - class_uri: schema:PropertyValue + class_uri: dcat:Dataset source_file: src/data_sheets_schema/schema/data_sheets_schema.yaml diff --git a/src/fairscape_integration/d4d_to_fairscape.py b/src/fairscape_integration/d4d_to_fairscape.py index 9bd65969..442f41b7 100644 --- a/src/fairscape_integration/d4d_to_fairscape.py +++ b/src/fairscape_integration/d4d_to_fairscape.py @@ -6,7 +6,7 @@ import sys from pathlib import Path -from typing import Dict, Any, List, Optional +from typing import Dict, Any, List, Optional, TYPE_CHECKING from datetime import datetime # Add fairscape_models to path @@ -14,6 +14,13 @@ if fairscape_path.exists() and str(fairscape_path) not in sys.path: sys.path.insert(0, str(fairscape_path)) +if TYPE_CHECKING: + from fairscape_models.rocrate import ( + ROCrateV1_2, + ROCrateMetadataFileElem, + ROCrateMetadataElem + ) + try: from fairscape_models.rocrate import ( ROCrateV1_2, @@ -27,6 +34,10 @@ except ImportError as e: print(f"Error: Cannot import FAIRSCAPE models: {e}") FAIRSCAPE_AVAILABLE = False + # Provide stub types when not available + ROCrateV1_2 = Any # type: ignore + ROCrateMetadataFileElem = Any # type: ignore + ROCrateMetadataElem = Any # type: ignore class D4DToFairscapeConverter: @@ -39,16 +50,16 @@ def __init__(self): def convert(self, d4d_dict: Dict[str, Any]) -> ROCrateV1_2: """ Convert D4D dictionary to FAIRSCAPE RO-Crate. - + Args: d4d_dict: D4D metadata dictionary - + Returns: FAIRSCAPE ROCrateV1_2 Pydantic model """ # Build graph elements graph = [] - + # 1. Add metadata descriptor metadata_descriptor = ROCrateMetadataFileElem(**{ "@id": "ro-crate-metadata.json", @@ -57,12 +68,16 @@ def convert(self, d4d_dict: Dict[str, Any]) -> ROCrateV1_2: "about": {"@id": "./"} }) graph.append(metadata_descriptor) - - # 2. Add root dataset - dataset = self._build_dataset(d4d_dict) + + # 2. Build file collections (nested Datasets) + file_collections, hasPart_ids = self._build_file_collections(d4d_dict) + graph.extend(file_collections) + + # 3. Add root dataset (with hasPart references to file collections) + dataset = self._build_dataset(d4d_dict, hasPart_ids) graph.append(dataset) - - # 3. Create RO-Crate + + # 4. Create RO-Crate rocrate = ROCrateV1_2(**{ "@context": { "@vocab": "https://schema.org/", @@ -72,11 +87,20 @@ def convert(self, d4d_dict: Dict[str, Any]) -> ROCrateV1_2: }, "@graph": graph }) - + return rocrate - def _build_dataset(self, d4d_dict: Dict[str, Any]) -> ROCrateMetadataElem: - """Build Dataset from D4D metadata.""" + def _build_dataset(self, d4d_dict: Dict[str, Any], hasPart_ids: List[str] = None) -> ROCrateMetadataElem: + """ + Build Dataset from D4D metadata. + + Args: + d4d_dict: D4D metadata dictionary + hasPart_ids: List of @id references to FileCollection entities + + Returns: + ROCrateMetadataElem representing the root Dataset + """ # Extract author names from D4D creators (which may be complex Person objects) authors = d4d_dict.get("creators") or d4d_dict.get("author") @@ -106,7 +130,7 @@ def _build_dataset(self, d4d_dict: Dict[str, Any]) -> ROCrateMetadataElem: "version": d4d_dict.get("version", "1.0"), "author": author_str, "license": d4d_dict.get("license", "No license specified"), # Required field - "hasPart": [] # Required field, start with empty list + "hasPart": [{"@id": id} for id in (hasPart_ids or [])] # Add file collection references } # Add optional Schema.org fields @@ -119,8 +143,17 @@ def _build_dataset(self, d4d_dict: Dict[str, Any]) -> ROCrateMetadataElem: if "doi" in d4d_dict: dataset_params["identifier"] = d4d_dict["doi"] - if "bytes" in d4d_dict: - dataset_params["contentSize"] = str(d4d_dict["bytes"]) + # File properties: only add at dataset level if no file_collections exist + # (for backward compatibility with legacy files) + has_file_collections = bool(d4d_dict.get("file_collections")) + + if not has_file_collections: + if "bytes" in d4d_dict: + dataset_params["contentSize"] = str(d4d_dict["bytes"]) + else: + # Use aggregated total_size_bytes if available + if "total_size_bytes" in d4d_dict: + dataset_params["contentSize"] = str(d4d_dict["total_size_bytes"]) # Add EVI namespace properties (computational provenance) evi_mapping = { @@ -130,10 +163,13 @@ def _build_dataset(self, d4d_dict: Dict[str, Any]) -> ROCrateMetadataElem: 'schema_count': 'evi:schemaCount', 'total_entities': 'evi:totalEntities', 'distribution_formats': 'evi:formats', - 'md5': 'evi:md5', - 'sha256': 'evi:sha256', } + # Only add file-level properties if no file_collections + if not has_file_collections: + evi_mapping['md5'] = 'evi:md5' + evi_mapping['sha256'] = 'evi:sha256' + for d4d_field, evi_prop in evi_mapping.items(): if d4d_field in d4d_dict: dataset_params[evi_prop] = d4d_dict[d4d_field] @@ -170,7 +206,8 @@ def _build_dataset(self, d4d_dict: Dict[str, Any]) -> ROCrateMetadataElem: 'content_warnings': 'd4d:contentWarning', 'informed_consent': 'd4d:informedConsent', 'human_subject_research': 'd4d:humanSubject', - 'vulnerable_populations': 'd4d:atRiskPopulations', + 'at_risk_populations': 'd4d:atRiskPopulations', + 'vulnerable_populations': 'd4d:atRiskPopulations', # Backward compatibility } for d4d_field, d4d_prop in d4d_mapping.items(): @@ -181,7 +218,74 @@ def _build_dataset(self, d4d_dict: Dict[str, Any]) -> ROCrateMetadataElem: dataset = ROCrateMetadataElem(**dataset_params) return dataset - + + def _build_file_collections(self, d4d_dict: Dict[str, Any]) -> tuple[List[ROCrateMetadataElem], List[str]]: + """ + Build nested Dataset entities for FileCollections. + + Args: + d4d_dict: D4D metadata dictionary + + Returns: + Tuple of (file_collection_elements, hasPart_ids) + """ + file_collections = [] + hasPart_ids = [] + + # Get file_collections from D4D + collections_list = d4d_dict.get("file_collections", []) + + for idx, fc in enumerate(collections_list): + if not isinstance(fc, dict): + continue + + # Build @id for this collection + collection_id = fc.get("id") or f"#collection-{idx + 1}" + hasPart_ids.append(collection_id) + + # Build nested Dataset parameters + collection_params = { + "@id": collection_id, + "@type": ["Dataset"], # Must be a list + "name": fc.get("name") or fc.get("title") or f"File Collection {idx + 1}", + "description": fc.get("description") or "File collection", + # Required fields for ROCrateMetadataElem + "keywords": fc.get("keywords", []), + "version": fc.get("version", "1.0"), + "author": fc.get("author", "Unknown"), + "license": fc.get("license", "Unspecified"), + "hasPart": [] + } + + # Map FileCollection properties to RO-Crate Dataset properties + # Note: format, bytes, encoding, media_type, sha256, md5, dialect, hash + # are now file-level properties (on File objects), not FileCollection properties + + # Collection-level aggregate size + if "total_bytes" in fc: + collection_params["contentSize"] = str(fc["total_bytes"]) + + # Collection location/URL + if "path" in fc: + collection_params["contentUrl"] = fc["path"] + + # Collection compression (if packaged as archive) + if "compression" in fc: + collection_params["fileFormat"] = fc["compression"] + + # D4D-specific collection properties + if "collection_type" in fc: + collection_params["d4d:collectionType"] = fc["collection_type"] + + if "file_count" in fc: + collection_params["d4d:fileCount"] = fc["file_count"] + + # Create nested Dataset element + collection_elem = ROCrateMetadataElem(**collection_params) + file_collections.append(collection_elem) + + return file_collections, hasPart_ids + def validate(self, rocrate: ROCrateV1_2) -> tuple[bool, Optional[List[str]]]: """ Validate FAIRSCAPE RO-Crate. diff --git a/src/fairscape_integration/fairscape_to_d4d.py b/src/fairscape_integration/fairscape_to_d4d.py index 10008673..e8dae984 100644 --- a/src/fairscape_integration/fairscape_to_d4d.py +++ b/src/fairscape_integration/fairscape_to_d4d.py @@ -108,21 +108,31 @@ def convert(self, rocrate_input: Any) -> Dict[str, Any]: except Exception as e: print(f"⚠ Warning: RO-Crate validation failed: {e}") - # Extract Dataset entity from @graph - dataset = self._extract_dataset(rocrate_data) + # Extract Dataset entity and nested Datasets from @graph + dataset, nested_datasets = self._extract_datasets(rocrate_data) if not dataset: raise ValueError("No Dataset entity found in RO-Crate @graph") # Convert to D4D - d4d_dict = self._build_d4d(dataset, rocrate_data) + d4d_dict = self._build_d4d(dataset, nested_datasets, rocrate_data) return d4d_dict - def _extract_dataset(self, rocrate_data: Dict) -> Optional[Dict]: - """Extract Dataset entity from RO-Crate @graph.""" + def _extract_datasets(self, rocrate_data: Dict) -> Tuple[Optional[Dict], List[Dict]]: + """ + Extract main Dataset and nested Datasets from RO-Crate @graph. + + Returns: + Tuple of (main_dataset, nested_datasets_list) + """ graph = rocrate_data.get('@graph', []) + main_dataset = None + nested_datasets = [] + hasPart_ids = set() + + # First pass: find main dataset and collect hasPart references for entity in graph: entity_type = entity.get('@type', []) if isinstance(entity_type, str): @@ -132,25 +142,84 @@ def _extract_dataset(self, rocrate_data: Dict) -> Optional[Dict]: # Skip metadata descriptor if entity.get('@id') == 'ro-crate-metadata.json': continue - return entity - return None + # Main dataset is the root with @id "./" or has ROCrate type + entity_id = entity.get('@id', '') + if entity_id == './' or 'https://w3id.org/EVI#ROCrate' in entity_type: + main_dataset = entity + # Collect hasPart references + has_part = entity.get('hasPart', []) + for part in has_part: + if isinstance(part, dict) and '@id' in part: + hasPart_ids.add(part['@id']) + elif isinstance(part, str): + hasPart_ids.add(part) + + # Second pass: collect nested datasets (those referenced by hasPart) + for entity in graph: + entity_type = entity.get('@type', []) + if isinstance(entity_type, str): + entity_type = [entity_type] + + if 'Dataset' in entity_type: + entity_id = entity.get('@id', '') + if entity_id in hasPart_ids: + nested_datasets.append(entity) + + return main_dataset, nested_datasets + + def _build_d4d(self, dataset: Dict, nested_datasets: List[Dict], full_rocrate: Dict) -> Dict[str, Any]: + """ + Build D4D dictionary from RO-Crate Dataset entity. + + Args: + dataset: Main Dataset entity + nested_datasets: Nested Dataset entities (FileCollections) + full_rocrate: Full RO-Crate data - def _build_d4d(self, dataset: Dict, full_rocrate: Dict) -> Dict[str, Any]: - """Build D4D dictionary from RO-Crate Dataset entity.""" + Returns: + D4D dictionary + """ d4d = { # Required D4D metadata - 'schema_version': '1.0', + 'schema_version': '1.1', # Updated to 1.1 for FileCollection support 'generated_date': datetime.now().isoformat(), 'source': 'FAIRSCAPE RO-Crate', } # Map basic properties - d4d.update(self._map_basic_properties(dataset)) + basic_props = self._map_basic_properties(dataset) + + # Convert nested Datasets to FileCollections + has_file_collections = False + if nested_datasets: + file_collections = self._build_file_collections(nested_datasets) + if file_collections: + d4d['file_collections'] = file_collections + has_file_collections = True + + # For schema 1.1 with file_collections: map contentSize to total_size_bytes + if 'bytes' in basic_props: + basic_props['total_size_bytes'] = basic_props.pop('bytes') + + d4d.update(basic_props) + + # Map complex properties (skip hasPart mapping if we have file_collections) + complex_props = self._map_complex_properties(dataset) + if has_file_collections and 'resources' in complex_props: + # Filter out resources that are already in file_collections + fc_ids = {fc.get('id') for fc in d4d.get('file_collections', [])} + if isinstance(complex_props['resources'], list): + complex_props['resources'] = [ + r for r in complex_props['resources'] + if r not in fc_ids + ] + # Remove resources if empty + if not complex_props['resources']: + del complex_props['resources'] - # Map complex properties - d4d.update(self._map_complex_properties(dataset)) + d4d.update(complex_props) # Map EVI properties (computational provenance) d4d.update(self._map_evi_properties(dataset)) @@ -163,6 +232,66 @@ def _build_d4d(self, dataset: Dict, full_rocrate: Dict) -> Dict[str, Any]: return d4d + def _build_file_collections(self, nested_datasets: List[Dict]) -> List[Dict[str, Any]]: + """ + Convert nested RO-Crate Datasets to D4D FileCollections. + + Args: + nested_datasets: List of nested Dataset entities from RO-Crate + + Returns: + List of FileCollection dictionaries + """ + file_collections = [] + + for dataset in nested_datasets: + collection = {} + + # Map basic properties + if '@id' in dataset: + collection['id'] = dataset['@id'] + + if 'name' in dataset: + collection['name'] = dataset['name'] + + if 'description' in dataset: + collection['description'] = dataset['description'] + + # Map collection-level properties + # Note: encodingFormat, sha256, md5, format, bytes, encoding are now + # file-level properties (on File objects), not FileCollection properties + + if 'contentSize' in dataset: + # Parse size string to total_bytes (aggregate size) + size_str = dataset['contentSize'] + if isinstance(size_str, str): + collection['total_bytes'] = self._parse_size(size_str) + else: + collection['total_bytes'] = size_str + + if 'contentUrl' in dataset: + collection['path'] = dataset['contentUrl'] + + if 'fileFormat' in dataset: + collection['compression'] = dataset['fileFormat'] + + # Map D4D-specific properties + if 'd4d:collectionType' in dataset: + # collection_type is multivalued, wrap scalar as array + collection_type = dataset['d4d:collectionType'] + collection['collection_type'] = ( + collection_type if isinstance(collection_type, list) else [collection_type] + ) + + if 'd4d:fileCount' in dataset: + collection['file_count'] = dataset['d4d:fileCount'] + + # Only add non-empty collections + if collection: + file_collections.append(collection) + + return file_collections + def _map_basic_properties(self, dataset: Dict) -> Dict[str, Any]: """Map basic Schema.org properties to D4D.""" diff --git a/src/validation/unified_validator.py b/src/validation/unified_validator.py index e5d6d766..15614f2c 100644 --- a/src/validation/unified_validator.py +++ b/src/validation/unified_validator.py @@ -157,6 +157,97 @@ def _default_shapes_dir(self) -> Path: """Get default SHACL shapes directory.""" return Path("data/ro-crate/profiles/shapes") + # ========================================================================= + # Migration Support + # ========================================================================= + + @staticmethod + def migrate_legacy_file_properties(data: Dict[str, Any]) -> tuple[Dict[str, Any], List[str]]: + """ + Migrate legacy D4D files with file properties at Dataset level. + + Detects if Dataset has file properties (bytes, path, format, etc.) + and no file_collections. If so, creates a single FileCollection + with those properties. + + Args: + data: Parsed D4D data dictionary + + Returns: + Tuple of (migrated_data, warnings) + """ + warnings = [] + + # File properties that should be on File objects (not FileCollection) + file_level_props = ['format', 'encoding', 'media_type', 'hash', 'md5', 'sha256', 'dialect'] + # Collection properties that stay on FileCollection + collection_props = ['path', 'compression'] + # Size property needs special handling (bytes → total_bytes) + + # Check if migration needed + all_legacy_props = file_level_props + collection_props + ['bytes'] + has_file_props = any(k in data for k in all_legacy_props) + has_collections = 'file_collections' in data and data['file_collections'] + + if has_file_props and not has_collections: + # Create default file collection + file_collection = { + 'id': f"{data.get('id', 'dataset')}-files", + 'name': "Dataset Files", + 'description': "Migrated from legacy dataset file properties" + } + + # Create a File object for file-level properties + file_obj = { + 'id': f"{data.get('id', 'dataset')}-file", + 'file_type': 'data_file' + } + + # Track migrated properties for warning + migrated_props = [] + + # Move file-level properties to File object + for prop in file_level_props: + if prop in data: + file_obj[prop] = data.pop(prop) + migrated_props.append(prop) + + # Move collection-level properties to FileCollection + for prop in collection_props: + if prop in data: + file_collection[prop] = data.pop(prop) + migrated_props.append(prop) + + # Handle bytes → total_bytes conversion + if 'bytes' in data: + # Put bytes on File object + file_obj['bytes'] = data.pop('bytes') + # Set total_bytes on FileCollection (same value for single file) + file_collection['total_bytes'] = file_obj['bytes'] + migrated_props.append('bytes') + + # Add File to collection resources if it has any properties + if any(k in file_obj for k in file_level_props + ['bytes']): + file_collection['resources'] = [file_obj] + file_collection['file_count'] = 1 + + # Add collection + data['file_collections'] = [file_collection] + + # Create warning message + warning_msg = ( + f"DEPRECATION: File properties ({', '.join(migrated_props)}) at Dataset level are deprecated. " + f"Use file_collections with File objects instead. Automatically migrated to FileCollection with File resources. " + f"This automatic migration will be removed in schema version 2.0." + ) + warnings.append(warning_msg) + + # Update schema version if present + if 'schema_version' in data: + data['schema_version'] = '1.1' + + return data, warnings + # ========================================================================= # Level 1: Syntax Validation # ========================================================================= @@ -289,11 +380,40 @@ def _validate_d4d_semantic( return report try: + # Load and potentially migrate data + with open(input_path, 'r') as f: + data = yaml.safe_load(f) + + # Apply migration if needed + migrated_data, migration_warnings = self.migrate_legacy_file_properties(data) + + # Add migration warnings to report + report.warnings.extend(migration_warnings) + + # If migrated, write to temp file for validation + if migration_warnings: + import tempfile + temp_file = tempfile.NamedTemporaryFile( + mode='w', + suffix='.yaml', + delete=False + ) + try: + yaml.dump(migrated_data, temp_file, default_flow_style=False, sort_keys=False) + temp_file.close() + validation_path = Path(temp_file.name) + report.info.append("Validating migrated data (legacy file properties → FileCollection)") + except Exception as e: + report.errors.append(f"Failed to write migrated data: {e}") + return report + else: + validation_path = input_path + # Use linkml-validate command cmd = [ "linkml-validate", "-s", str(self.schema_path), - str(input_path) + str(validation_path) ] if target_class: @@ -306,6 +426,13 @@ def _validate_d4d_semantic( timeout=30 ) + # Clean up temp file if created + if migration_warnings and validation_path != input_path: + try: + validation_path.unlink() + except Exception: + pass # Best effort cleanup + if result.returncode == 0: report.info.append("D4D schema validation passed") else: @@ -324,6 +451,12 @@ def _validate_d4d_semantic( except subprocess.TimeoutExpired: report.passed = False report.errors.append("Validation timeout (>30 seconds)") + # Clean up temp file if created + if migration_warnings and validation_path != input_path: + try: + validation_path.unlink() + except Exception: + pass except FileNotFoundError: report.warnings.append("linkml-validate command not found") report.info.append("Install with: pip install linkml") diff --git a/tests/test_file_collection.py b/tests/test_file_collection.py new file mode 100644 index 00000000..a469f11b --- /dev/null +++ b/tests/test_file_collection.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +""" +Unit tests for FileCollection class. + +Tests FileCollection validation, property constraints, and integration +with Dataset class. +""" + +import unittest +import yaml +from pathlib import Path +import tempfile + + +class TestFileCollection(unittest.TestCase): + """Test FileCollection class validation and properties.""" + + def test_filecollection_basic_validation(self): + """Test that basic FileCollection validates correctly.""" + filecollection_data = { + 'id': 'test-collection-1', + 'name': 'Training Data', + 'description': 'Training dataset files', + 'collection_type': 'training_split', + 'total_bytes': 1048576, + 'file_count': 100 + } + + # This should validate without errors when using linkml-validate + # For now, just test the data structure is correct + self.assertIn('id', filecollection_data) + self.assertEqual(filecollection_data['collection_type'], 'training_split') + + def test_dataset_with_file_collections(self): + """Test Dataset containing FileCollections.""" + dataset_data = { + 'id': 'test-dataset', + 'title': 'Test Dataset', + 'description': 'A dataset with file collections', + 'file_collections': [ + { + 'id': 'collection-1', + 'name': 'Training Files', + 'collection_type': 'training_split', + 'total_bytes': 1048576 + }, + { + 'id': 'collection-2', + 'name': 'Test Files', + 'collection_type': 'test_split', + 'total_bytes': 524288 + } + ], + 'total_file_count': 200, + 'total_size_bytes': 1572864 + } + + self.assertEqual(len(dataset_data['file_collections']), 2) + self.assertEqual(dataset_data['total_file_count'], 200) + + def test_filecollection_enum_values(self): + """Test FileCollectionTypeEnum permissible values.""" + valid_types = [ + 'raw_data', + 'processed_data', + 'training_split', + 'test_split', + 'validation_split', + 'documentation', + 'metadata', + 'code', + 'supplementary', + 'other' + ] + + for collection_type in valid_types: + collection = { + 'id': f'collection-{collection_type}', + 'name': f'{collection_type} files', + 'collection_type': collection_type + } + self.assertEqual(collection['collection_type'], collection_type) + + def test_filecollection_properties_complete(self): + """Test FileCollection with all collection-level properties.""" + complete_collection = { + 'id': 'complete-collection', + 'name': 'Complete File Collection', + 'description': 'A collection with all properties', + 'collection_type': 'processed_data', + 'total_bytes': 2097152, + 'file_count': 50, + 'path': '/data/processed/', + 'compression': 'gzip', + 'resources': [ + { + 'id': 'file001.json', + 'file_type': 'data_file', + 'format': 'JSON', + 'bytes': 41943, + 'encoding': 'UTF-8', + 'media_type': 'application/json', + 'md5': 'abc123', + 'sha256': 'def456789' + } + ] + } + + # Verify collection-level properties present + expected_props = ['id', 'name', 'description', 'collection_type', + 'total_bytes', 'file_count', 'path', 'compression', 'resources'] + + for prop in expected_props: + self.assertIn(prop, complete_collection) + + # Verify file-level properties are in resources + self.assertEqual(len(complete_collection['resources']), 1) + file_obj = complete_collection['resources'][0] + self.assertEqual(file_obj['format'], 'JSON') + self.assertEqual(file_obj['bytes'], 41943) + + def test_nested_file_collections(self): + """Test FileCollection can contain nested FileCollections via resources.""" + parent_collection = { + 'id': 'parent-collection', + 'name': 'Parent Collection', + 'resources': [ + { + 'id': 'child-collection-1', + 'name': 'Child Collection 1', + 'collection_type': ['raw_data'] + }, + { + 'id': 'child-collection-2', + 'name': 'Child Collection 2', + 'collection_type': ['processed_data'] + } + ] + } + + self.assertEqual(len(parent_collection['resources']), 2) + self.assertEqual(parent_collection['resources'][0]['collection_type'], ['raw_data']) + self.assertEqual(parent_collection['resources'][1]['collection_type'], ['processed_data']) + + def test_dataset_without_file_collections_still_valid(self): + """Test that Dataset without file_collections is still valid.""" + legacy_dataset = { + 'id': 'legacy-dataset', + 'title': 'Legacy Dataset', + 'description': 'Dataset without file collections', + 'purposes': [], + 'creators': [] + } + + # Should be valid without file_collections + self.assertNotIn('file_collections', legacy_dataset) + self.assertIn('id', legacy_dataset) + + +class TestFileCollectionYAMLGeneration(unittest.TestCase): + """Test FileCollection YAML generation and validation.""" + + def test_generate_yaml_with_filecollection(self): + """Test generating valid D4D YAML with FileCollection.""" + d4d_data = { + 'id': 'https://example.org/dataset/123', + 'title': 'Example Dataset', + 'description': 'A dataset with file collections', + 'file_collections': [ + { + 'id': 'collection-raw', + 'name': 'Raw Data Files', + 'description': 'Unprocessed sensor data', + 'collection_type': ['raw_data'], + 'total_bytes': 5242880, + 'file_count': 150, + 'path': '/data/raw/', + 'resources': [ + { + 'id': 'raw001.csv', + 'file_type': 'data_file', + 'format': 'CSV', + 'bytes': 34952, + 'encoding': 'UTF-8', + 'sha256': 'a1b2c3d4e5f6' + } + ] + }, + { + 'id': 'collection-processed', + 'name': 'Processed Data Files', + 'description': 'Cleaned and normalized data', + 'collection_type': ['processed_data'], + 'total_bytes': 3145728, + 'file_count': 100, + 'path': '/data/processed/', + 'compression': 'gzip', + 'resources': [ + { + 'id': 'processed001.json', + 'file_type': 'data_file', + 'format': 'JSON', + 'bytes': 31457, + 'encoding': 'UTF-8', + 'sha256': 'g7h8i9j0k1l2' + } + ] + } + ], + 'total_file_count': 250, + 'total_size_bytes': 8388608 + } + + # Convert to YAML + yaml_str = yaml.dump(d4d_data, default_flow_style=False, sort_keys=False) + + # Parse back + parsed = yaml.safe_load(yaml_str) + + self.assertEqual(parsed['id'], d4d_data['id']) + self.assertEqual(len(parsed['file_collections']), 2) + self.assertEqual(parsed['file_collections'][0]['collection_type'], ['raw_data']) + self.assertEqual(parsed['total_file_count'], 250) + + def test_write_and_read_filecollection_yaml(self): + """Test writing and reading FileCollection YAML file.""" + d4d_data = { + 'id': 'test-dataset', + 'title': 'Test Dataset', + 'file_collections': [ + { + 'id': 'test-collection', + 'name': 'Test Files', + 'collection_type': 'test_split', + 'format': 'CSV', + 'bytes': 1024 + } + ] + } + + # Write to temp file + with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + yaml.dump(d4d_data, f, default_flow_style=False) + temp_path = f.name + + try: + # Read back + with open(temp_path) as f: + loaded_data = yaml.safe_load(f) + + self.assertEqual(loaded_data['id'], 'test-dataset') + self.assertEqual(loaded_data['file_collections'][0]['name'], 'Test Files') + finally: + Path(temp_path).unlink() + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_legacy_migration.py b/tests/test_legacy_migration.py new file mode 100644 index 00000000..14fb5675 --- /dev/null +++ b/tests/test_legacy_migration.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Tests for legacy file property migration. + +Tests that D4D files with file properties at Dataset level are automatically +migrated to use FileCollection. +""" + +import unittest +from src.validation.unified_validator import UnifiedValidator + + +class TestLegacyMigration(unittest.TestCase): + """Test legacy file property migration.""" + + def setUp(self): + """Set up test fixtures.""" + self.validator = UnifiedValidator() + + def test_migrate_legacy_file_properties(self): + """Test migration of file properties to FileCollection.""" + # Legacy data with file properties at Dataset level + legacy_data = { + 'id': 'test-dataset', + 'name': 'Test Dataset', + 'bytes': 1048576, + 'path': '/data/test.csv', + 'format': 'CSV', + 'encoding': 'UTF-8', + 'compression': 'gzip', + 'md5': 'abc123', + 'sha256': 'def456' + } + + # Apply migration + migrated_data, warnings = self.validator.migrate_legacy_file_properties(legacy_data) + + # Check migration happened + self.assertEqual(len(warnings), 1) + self.assertIn('DEPRECATION', warnings[0]) + self.assertIn('file_collections', warnings[0]) + + # Check file properties moved to file_collections + self.assertIn('file_collections', migrated_data) + self.assertEqual(len(migrated_data['file_collections']), 1) + + file_collection = migrated_data['file_collections'][0] + + # Collection-level properties + self.assertEqual(file_collection['total_bytes'], 1048576) # bytes → total_bytes + self.assertEqual(file_collection['path'], '/data/test.csv') + self.assertEqual(file_collection['compression'], 'gzip') + self.assertEqual(file_collection['file_count'], 1) + + # File-level properties should be in resources + self.assertIn('resources', file_collection) + self.assertEqual(len(file_collection['resources']), 1) + + file_obj = file_collection['resources'][0] + self.assertEqual(file_obj['bytes'], 1048576) + self.assertEqual(file_obj['format'], 'CSV') + self.assertEqual(file_obj['encoding'], 'UTF-8') + self.assertEqual(file_obj['md5'], 'abc123') + self.assertEqual(file_obj['sha256'], 'def456') + self.assertEqual(file_obj['file_type'], 'data_file') + + # Check file properties removed from dataset level + self.assertNotIn('bytes', migrated_data) + self.assertNotIn('path', migrated_data) + self.assertNotIn('format', migrated_data) + self.assertNotIn('encoding', migrated_data) + self.assertNotIn('compression', migrated_data) + self.assertNotIn('md5', migrated_data) + self.assertNotIn('sha256', migrated_data) + + # Check dataset-level properties preserved + self.assertEqual(migrated_data['id'], 'test-dataset') + self.assertEqual(migrated_data['name'], 'Test Dataset') + + def test_no_migration_when_file_collections_present(self): + """Test that migration doesn't happen if file_collections already exists.""" + data_with_collections = { + 'id': 'test-dataset', + 'name': 'Test Dataset', + 'bytes': 1048576, # File property present + 'file_collections': [ # But file_collections also present + { + 'id': 'collection-1', + 'name': 'Data Files', + 'format': 'JSON' + } + ] + } + + migrated_data, warnings = self.validator.migrate_legacy_file_properties(data_with_collections) + + # No migration should happen + self.assertEqual(len(warnings), 0) + self.assertEqual(migrated_data, data_with_collections) + + def test_no_migration_when_no_file_properties(self): + """Test that migration doesn't happen if no file properties present.""" + clean_data = { + 'id': 'test-dataset', + 'name': 'Test Dataset', + 'description': 'A clean dataset without legacy file properties' + } + + migrated_data, warnings = self.validator.migrate_legacy_file_properties(clean_data) + + # No migration should happen + self.assertEqual(len(warnings), 0) + self.assertEqual(migrated_data, clean_data) + + def test_migration_preserves_collection_metadata(self): + """Test that migrated FileCollection has correct metadata.""" + legacy_data = { + 'id': 'test-dataset', + 'bytes': 2048, + 'format': 'CSV' + } + + migrated_data, warnings = self.validator.migrate_legacy_file_properties(legacy_data) + + # Check FileCollection metadata + file_collection = migrated_data['file_collections'][0] + self.assertEqual(file_collection['id'], 'test-dataset-files') + self.assertEqual(file_collection['name'], 'Dataset Files') + self.assertIn('Migrated from legacy', file_collection['description']) + + def test_migration_handles_partial_file_properties(self): + """Test migration with only some file properties present.""" + partial_data = { + 'id': 'test-dataset', + 'bytes': 4096, + 'format': 'JSON' + # No md5, sha256, compression, etc. + } + + migrated_data, warnings = self.validator.migrate_legacy_file_properties(partial_data) + + # Migration should still happen + self.assertEqual(len(warnings), 1) + file_collection = migrated_data['file_collections'][0] + + # Collection should have total_bytes + self.assertEqual(file_collection['total_bytes'], 4096) + self.assertEqual(file_collection['file_count'], 1) + + # File-level properties should be in resources + self.assertIn('resources', file_collection) + file_obj = file_collection['resources'][0] + self.assertEqual(file_obj['bytes'], 4096) + self.assertEqual(file_obj['format'], 'JSON') + + # Properties not present should not be in File object + self.assertNotIn('md5', file_obj) + self.assertNotIn('sha256', file_obj) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_rocrate_file_collection.py b/tests/test_rocrate_file_collection.py new file mode 100644 index 00000000..801d3ba5 --- /dev/null +++ b/tests/test_rocrate_file_collection.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python3 +""" +Integration tests for FileCollection RO-Crate transformations. + +Tests bidirectional transformation between D4D FileCollection and +RO-Crate nested Dataset entities. +""" + +import unittest +from pathlib import Path +import sys + +# Add src to path for imports +repo_root = Path(__file__).parent.parent +if str(repo_root) not in sys.path: + sys.path.insert(0, str(repo_root)) + +try: + from src.fairscape_integration.d4d_to_fairscape import D4DToFairscapeConverter + from src.fairscape_integration.fairscape_to_d4d import FairscapeToD4DConverter + # Try to instantiate to check if FAIRSCAPE models are actually available + try: + _test_converter = D4DToFairscapeConverter() + FAIRSCAPE_AVAILABLE = True + except RuntimeError: + FAIRSCAPE_AVAILABLE = False +except ImportError: + FAIRSCAPE_AVAILABLE = False + + +@unittest.skipIf(not FAIRSCAPE_AVAILABLE, "FAIRSCAPE integration not available") +class TestFileCollectionROCrateTransformation(unittest.TestCase): + """Test D4D ↔ RO-Crate transformations with FileCollection.""" + + def setUp(self): + """Set up converters.""" + self.d4d_to_rocrate = D4DToFairscapeConverter() + self.rocrate_to_d4d = FairscapeToD4DConverter() + + def test_d4d_to_rocrate_with_filecollections(self): + """Test D4D with FileCollections converts to RO-Crate with nested Datasets.""" + d4d_data = { + 'id': 'test-dataset', + 'title': 'Test Dataset', + 'description': 'A dataset with file collections', + 'version': '1.0', + 'license': 'MIT', + 'file_collections': [ + { + 'id': 'collection-1', + 'name': 'Training Files', + 'description': 'Training data files', + 'collection_type': 'training_split', + 'total_bytes': 1048576, + 'file_count': 100, + 'resources': [ + { + 'id': 'train001.csv', + 'file_type': 'data_file', + 'format': 'CSV', + 'bytes': 10485, + 'sha256': 'abc123' + } + ] + }, + { + 'id': 'collection-2', + 'name': 'Test Files', + 'description': 'Test data files', + 'collection_type': 'test_split', + 'total_bytes': 524288, + 'file_count': 50, + 'resources': [ + { + 'id': 'test001.json', + 'file_type': 'data_file', + 'format': 'JSON', + 'bytes': 10485, + 'md5': 'def456' + } + ] + } + ], + 'total_file_count': 150, + 'total_size_bytes': 1572864 + } + + # Convert to RO-Crate + rocrate = self.d4d_to_rocrate.convert(d4d_data) + + # Verify RO-Crate structure + rocrate_dict = rocrate.model_dump(by_alias=True, exclude_none=True) + graph = rocrate_dict['@graph'] + + # Should have: metadata descriptor + root dataset + 2 file collections = 4 entities + self.assertEqual(len(graph), 4) + + # Find root dataset + root_dataset = None + nested_datasets = [] + for entity in graph: + if entity.get('@id') == './': + root_dataset = entity + elif entity.get('@id') != 'ro-crate-metadata.json': + # Check if Dataset is in @type (can be string or list) + entity_type = entity.get('@type', []) + if isinstance(entity_type, str): + entity_type = [entity_type] + if 'Dataset' in entity_type and entity.get('@id') != './': + nested_datasets.append(entity) + + # Verify root dataset + self.assertIsNotNone(root_dataset) + self.assertEqual(root_dataset['name'], 'Test Dataset') + self.assertIn('hasPart', root_dataset) + self.assertEqual(len(root_dataset['hasPart']), 2) + + # Verify nested datasets (file collections) + self.assertEqual(len(nested_datasets), 2) + + # Find training collection + training_collection = next( + (ds for ds in nested_datasets if ds.get('@id') == 'collection-1'), + None + ) + self.assertIsNotNone(training_collection) + self.assertEqual(training_collection['name'], 'Training Files') + # Note: encodingFormat and sha256 are now on individual File objects, not FileCollection + # FileCollection has aggregate total_bytes which maps to contentSize + self.assertEqual(training_collection.get('contentSize'), '1048576') + self.assertEqual(training_collection['d4d:fileCount'], 100) + + def test_rocrate_to_d4d_with_nested_datasets(self): + """Test RO-Crate with nested Datasets converts to D4D with FileCollections.""" + rocrate_data = { + '@context': { + '@vocab': 'https://schema.org/', + 'd4d': 'https://w3id.org/bridge2ai/data-sheets-schema/' + }, + '@graph': [ + { + '@id': 'ro-crate-metadata.json', + '@type': 'CreativeWork', + 'conformsTo': {'@id': 'https://w3id.org/ro/crate/1.2'}, + 'about': {'@id': './'} + }, + { + '@id': './', + '@type': ['Dataset', 'https://w3id.org/EVI#ROCrate'], + 'name': 'Test Dataset', + 'description': 'A dataset with nested datasets', + 'version': '1.0', + 'license': 'MIT', + 'author': 'Test Author', + 'hasPart': [ + {'@id': '#files-raw'}, + {'@id': '#files-processed'} + ] + }, + { + '@id': '#files-raw', + '@type': 'Dataset', + 'name': 'Raw Data Files', + 'description': 'Unprocessed data', + 'encodingFormat': 'CSV', + 'contentSize': '2097152', + 'sha256': 'raw123', + 'd4d:collectionType': 'raw_data', + 'd4d:fileCount': 200 + }, + { + '@id': '#files-processed', + '@type': 'Dataset', + 'name': 'Processed Data Files', + 'description': 'Cleaned data', + 'encodingFormat': 'JSON', + 'contentSize': '1048576', + 'md5': 'proc456', + 'd4d:collectionType': 'processed_data', + 'd4d:fileCount': 100 + } + ] + } + + # Convert to D4D + d4d_data = self.rocrate_to_d4d.convert(rocrate_data) + + # Verify D4D structure + self.assertEqual(d4d_data['title'], 'Test Dataset') + self.assertIn('file_collections', d4d_data) + self.assertEqual(len(d4d_data['file_collections']), 2) + + # Find raw collection + raw_collection = next( + (fc for fc in d4d_data['file_collections'] if fc.get('id') == '#files-raw'), + None + ) + self.assertIsNotNone(raw_collection) + self.assertEqual(raw_collection['name'], 'Raw Data Files') + # Note: format, bytes, sha256 are now file-level properties, not collection-level + # Collection has aggregate total_bytes from RO-Crate contentSize + self.assertEqual(raw_collection.get('total_bytes'), 2097152) + self.assertEqual(raw_collection['collection_type'], ['raw_data']) + self.assertEqual(raw_collection['file_count'], 200) + + def test_roundtrip_preservation(self): + """Test D4D → RO-Crate → D4D preserves FileCollection structure.""" + original_d4d = { + 'id': 'roundtrip-dataset', + 'title': 'Round-trip Test Dataset', + 'description': 'Testing round-trip transformation', + 'version': '1.0', + 'license': 'Apache-2.0', + 'file_collections': [ + { + 'id': 'test-collection', + 'name': 'Test Files', + 'description': 'Test data', + 'collection_type': ['test_split'], + 'total_bytes': 1024, + 'file_count': 10, + 'path': '/data/test/', + 'resources': [ + { + 'id': 'test001.csv', + 'file_type': 'data_file', + 'format': 'CSV', + 'bytes': 102, + 'encoding': 'UTF-8', + 'sha256': 'test123' + } + ] + } + ], + 'total_file_count': 10, + 'total_size_bytes': 1024 + } + + # D4D → RO-Crate + rocrate = self.d4d_to_rocrate.convert(original_d4d) + rocrate_dict = rocrate.model_dump(by_alias=True, exclude_none=True) + + # RO-Crate → D4D + recovered_d4d = self.rocrate_to_d4d.convert(rocrate_dict) + + # Verify preservation + self.assertEqual(recovered_d4d['title'], original_d4d['title']) + self.assertIn('file_collections', recovered_d4d) + self.assertEqual(len(recovered_d4d['file_collections']), 1) + + recovered_collection = recovered_d4d['file_collections'][0] + original_collection = original_d4d['file_collections'][0] + + # Check collection-level properties preserved + self.assertEqual(recovered_collection['name'], original_collection['name']) + self.assertEqual(recovered_collection.get('total_bytes'), original_collection['total_bytes']) + self.assertEqual(recovered_collection['file_count'], original_collection['file_count']) + self.assertEqual(recovered_collection['collection_type'], original_collection['collection_type']) + # Note: File-level properties (format, encoding, sha256) are in resources, not on collection + + def test_backward_compatibility_no_filecollections(self): + """Test that D4D without FileCollections still works.""" + legacy_d4d = { + 'id': 'legacy-dataset', + 'title': 'Legacy Dataset', + 'description': 'Dataset without file collections', + 'version': '1.0', + 'license': 'MIT', + 'bytes': 2048, + 'format': 'CSV', + 'md5': 'legacy123' + } + + # Should convert without errors + rocrate = self.d4d_to_rocrate.convert(legacy_d4d) + rocrate_dict = rocrate.model_dump(by_alias=True, exclude_none=True) + + # Verify file properties at root level + root_dataset = next( + (e for e in rocrate_dict['@graph'] if e.get('@id') == './'), + None + ) + self.assertIsNotNone(root_dataset) + self.assertEqual(root_dataset['contentSize'], '2048') + self.assertEqual(root_dataset['evi:md5'], 'legacy123') + + +if __name__ == '__main__': + unittest.main()