diff --git a/pydantic_models/expression_input.py b/pydantic_models/expression_input.py new file mode 100644 index 0000000..c6fe0b5 --- /dev/null +++ b/pydantic_models/expression_input.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +"""Generating json schema for expression data via pydantic models.""" + +from __future__ import annotations + +from enum import Enum +import json +from typing import List, Optional + +from pydantic import BaseModel, Extra, Field + +class ExpressionAggregated(BaseModel): + """Expression object for aggregated data.""" + assayGroupId: str = Field( + description="Identifier for the assay group.", + ) + min: float = Field( + description='Minimum value in the assay group.', + ) + q1: float = Field( + description='First quantile of values in the assay group.', + ) + q2: float = Field( + description='Median of values in the assay group.', + ) + q3: float = Field( + description='third quantile of values in the assay group.', + ) + max: float = Field( + description='Maximum expression value in the assay group.', + ) + + class Config: + extra = Extra.forbid + anystr_strip_whitespace = True + +class ExpressionAggregatedSchema(BaseModel): + """Schema for aggregated expression data.""" + geneProductId: str = Field( + description="Identifier of measured gene product, protein or transcript.", + examples=['ENSG00000157764', 'Q9HC10'], + ) + unit: str = Field( + description='Unit of the expression value.', + examples=['tpms'], + ) + expression: List[ExpressionAggregated] + + class Config: + title = 'OpenTargets-gene-expression-aggregated' + extra = Extra.forbid + anystr_strip_whitespace = True + +class ExperimentalDesign(BaseModel): + """Experimental design object.""" + assayGroupId: str = Field( + description="Identifier for the assay group.", + ) + assayId: str = Field( + description="Identifier for the assay.", + ) + assayGroup: str = Field( + description='Group of the assay.', + ) + age: str = Field( + description='Age of the organism.', + ) + cultivar: str = Field( + description='Cultivar name.', + ) + genotype: str = Field( + description='Genotype of the organism.', + ) + organismPart: str = Field( + description='Part of the organism.', + ) + + class Config: + extra = Extra.forbid + anystr_strip_whitespace = True + +class ExpressionUnAggregated(BaseModel): + """Expression object for unaggregated data.""" + assayId: str = Field( + description="Identifier for the assay.", + ) + value: float = Field( + description='Expression value in the assay.', + ) + + class Config: + extra = Extra.forbid + anystr_strip_whitespace = True + +class ExpressionUnaggregatedSchema(BaseModel): + """Schema for unaggregated expression data.""" + geneProductId: str = Field( + description="Identifier of measured gene product, protein or transcript.", + examples=['ENSG00000157764', 'Q9HC10'], + ) + unit: str = Field( + description='Unit of the expression value.', + examples=['tpms'], + ) + expression: List[ExpressionUnAggregated] + + class Config: + title = 'OpenTargets-gene-expression-unaggregated' + extra = Extra.forbid + anystr_strip_whitespace = True + + +class StudyMetadataSchema(BaseModel): + """Schema for expression metadata.""" + experimentId: str = Field( + description="Identifier for the experiment.", + ) + experimentType: str = Field( + description='Type of the experiment.', + ) + species: str = Field( + description='Species name.', + examples=['Sorghum bicolor'], + ) + speciesOntURI: str = Field( + description='Species ontology URI.', + examples=['http://purl.obolibrary.org/obo/NCBITaxon_4558'], + ) + pubmedIds: List[str] = Field( + description='List of pubmed identifiers.', + examples=['28186631'], + ) + provider: str = Field( + description='Provider of the data.', + ) + experimentalDesigns: List[ExperimentalDesign] + +def main(): + with open('expression_aggregated.json', 'wt') as f: + f.write(ExpressionAggregatedSchema.schema_json(indent=2)) + + with open('expression_unaggregated.json', 'wt') as f: + f.write(ExpressionUnaggregatedSchema.schema_json(indent=2)) + + with open('expression_study_metadata.json', 'wt') as f: + f.write(StudyMetadataSchema.schema_json(indent=2)) + + + +if __name__ == '__main__': + main() diff --git a/schemas/expression_aggregated.json b/schemas/expression_aggregated.json new file mode 100644 index 0000000..77f0a4e --- /dev/null +++ b/schemas/expression_aggregated.json @@ -0,0 +1,85 @@ +{ + "title": "OpenTargets-gene-expression-aggregated", + "description": "Schema for aggregated expression data.", + "type": "object", + "properties": { + "geneProductId": { + "title": "Geneproductid", + "description": "Identifier of measured gene product, protein or transcript.", + "examples": [ + "ENSG00000157764", + "Q9HC10" + ], + "type": "string" + }, + "unit": { + "title": "Unit", + "description": "Unit of the expression value.", + "examples": [ + "tpms" + ], + "type": "string" + }, + "expression": { + "title": "Expression", + "type": "array", + "items": { + "$ref": "#/definitions/ExpressionAggregated" + } + } + }, + "required": [ + "geneProductId", + "unit", + "expression" + ], + "additionalProperties": false, + "definitions": { + "ExpressionAggregated": { + "title": "ExpressionAggregated", + "description": "Expression object for aggregated data.", + "type": "object", + "properties": { + "assayGroupId": { + "title": "Assaygroupid", + "description": "Identifier for the assay group.", + "type": "string" + }, + "min": { + "title": "Min", + "description": "Minimum value in the assay group.", + "type": "number" + }, + "q1": { + "title": "Q1", + "description": "First quantile of values in the assay group.", + "type": "number" + }, + "q2": { + "title": "Q2", + "description": "Median of values in the assay group.", + "type": "number" + }, + "q3": { + "title": "Q3", + "description": "third quantile of values in the assay group.", + "type": "number" + }, + "max": { + "title": "Max", + "description": "Maximum expression value in the assay group.", + "type": "number" + } + }, + "required": [ + "assayGroupId", + "min", + "q1", + "q2", + "q3", + "max" + ], + "additionalProperties": false + } + } +} diff --git a/schemas/expression_study_metadata.json b/schemas/expression_study_metadata.json new file mode 100644 index 0000000..4a8e600 --- /dev/null +++ b/schemas/expression_study_metadata.json @@ -0,0 +1,119 @@ +{ + "title": "StudyMetadataSchema", + "description": "Schema for expression metadata.", + "type": "object", + "properties": { + "experimentId": { + "title": "Experimentid", + "description": "Identifier for the experiment.", + "type": "string" + }, + "experimentType": { + "title": "Experimenttype", + "description": "Type of the experiment.", + "type": "string" + }, + "species": { + "title": "Species", + "description": "Species name.", + "examples": [ + "Sorghum bicolor" + ], + "type": "string" + }, + "speciesOntURI": { + "title": "Speciesonturi", + "description": "Species ontology URI.", + "examples": [ + "http://purl.obolibrary.org/obo/NCBITaxon_4558" + ], + "type": "string" + }, + "pubmedIds": { + "title": "Pubmedids", + "description": "List of pubmed identifiers.", + "examples": [ + "28186631" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "provider": { + "title": "Provider", + "description": "Provider of the data.", + "type": "string" + }, + "experimentalDesigns": { + "title": "Experimentaldesigns", + "type": "array", + "items": { + "$ref": "#/definitions/ExperimentalDesign" + } + } + }, + "required": [ + "experimentId", + "experimentType", + "species", + "speciesOntURI", + "pubmedIds", + "provider", + "experimentalDesigns" + ], + "definitions": { + "ExperimentalDesign": { + "title": "ExperimentalDesign", + "description": "Experimental design object.", + "type": "object", + "properties": { + "assayGroupId": { + "title": "Assaygroupid", + "description": "Identifier for the assay group.", + "type": "string" + }, + "assayId": { + "title": "Assayid", + "description": "Identifier for the assay.", + "type": "string" + }, + "assayGroup": { + "title": "Assaygroup", + "description": "Group of the assay.", + "type": "string" + }, + "age": { + "title": "Age", + "description": "Age of the organism.", + "type": "string" + }, + "cultivar": { + "title": "Cultivar", + "description": "Cultivar name.", + "type": "string" + }, + "genotype": { + "title": "Genotype", + "description": "Genotype of the organism.", + "type": "string" + }, + "organismPart": { + "title": "Organismpart", + "description": "Part of the organism.", + "type": "string" + } + }, + "required": [ + "assayGroupId", + "assayId", + "assayGroup", + "age", + "cultivar", + "genotype", + "organismPart" + ], + "additionalProperties": false + } + } +} diff --git a/schemas/expression_unaggregated.json b/schemas/expression_unaggregated.json new file mode 100644 index 0000000..313909e --- /dev/null +++ b/schemas/expression_unaggregated.json @@ -0,0 +1,61 @@ +{ + "title": "OpenTargets-gene-expression-unaggregated", + "description": "Schema for unaggregated expression data.", + "type": "object", + "properties": { + "geneProductId": { + "title": "Geneproductid", + "description": "Identifier of measured gene product, protein or transcript.", + "examples": [ + "ENSG00000157764", + "Q9HC10" + ], + "type": "string" + }, + "unit": { + "title": "Unit", + "description": "Unit of the expression value.", + "examples": [ + "tpms" + ], + "type": "string" + }, + "expression": { + "title": "Expression", + "type": "array", + "items": { + "$ref": "#/definitions/ExpressionUnAggregated" + } + } + }, + "required": [ + "geneProductId", + "unit", + "expression" + ], + "additionalProperties": false, + "definitions": { + "ExpressionUnAggregated": { + "title": "ExpressionUnAggregated", + "description": "Expression object for unaggregated data.", + "type": "object", + "properties": { + "assayId": { + "title": "Assayid", + "description": "Identifier for the assay.", + "type": "string" + }, + "value": { + "title": "Value", + "description": "Expression value in the assay.", + "type": "number" + } + }, + "required": [ + "assayId", + "value" + ], + "additionalProperties": false + } + } +}