Skip to content

Commit 64d6cb2

Browse files
Merge pull request #55 from AustralianBioCommons/fix-data-type-required
Fix data type required
2 parents 7d19af0 + e24ae66 commit 64d6cb2

File tree

6 files changed

+138
-14
lines changed

6 files changed

+138
-14
lines changed

jupyter/protoyping.ipynb

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 1,
66
"id": "26ac381a",
77
"metadata": {},
88
"outputs": [],
@@ -12,14 +12,27 @@
1212
},
1313
{
1414
"cell_type": "code",
15-
"execution_count": null,
15+
"execution_count": 2,
1616
"id": "7bda6e32",
1717
"metadata": {},
18-
"outputs": [],
18+
"outputs": [
19+
{
20+
"ename": "ValueError",
21+
"evalue": "Schema 'lipidomics_file' with category 'data_file' must include properties 'data_type', 'data_format', and 'data_category'. Please add these properties to the 'properties' section.",
22+
"output_type": "error",
23+
"traceback": [
24+
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
25+
"\u001b[31mValueError\u001b[39m Traceback (most recent call last)",
26+
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m lipid_schema = g.utils.load_yaml(\u001b[33m\"\u001b[39m\u001b[33m../output/lipidomics_file.yaml\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 2\u001b[39m rule_val = g.validators.rule_validator.RuleValidator(lipid_schema)\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m \u001b[43mrule_val\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdata_file_props_need_data_props\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
27+
"\u001b[36mFile \u001b[39m\u001b[32m~/projects/gen3schemadev/src/gen3schemadev/validators/rule_validator.py:208\u001b[39m, in \u001b[36mRuleValidator.data_file_props_need_data_props\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 206\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m need_props.issubset(prop_keys):\n\u001b[32m 207\u001b[39m schema_id = \u001b[38;5;28mself\u001b[39m.schema.get(\u001b[33m\"\u001b[39m\u001b[33mid\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m<unknown id>\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m208\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 209\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mSchema \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mschema_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m with category \u001b[39m\u001b[33m'\u001b[39m\u001b[33mdata_file\u001b[39m\u001b[33m'\u001b[39m\u001b[33m must include properties \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m'\u001b[39m\u001b[33mdata_type\u001b[39m\u001b[33m'\u001b[39m\u001b[33m, \u001b[39m\u001b[33m'\u001b[39m\u001b[33mdata_format\u001b[39m\u001b[33m'\u001b[39m\u001b[33m, and \u001b[39m\u001b[33m'\u001b[39m\u001b[33mdata_category\u001b[39m\u001b[33m'\u001b[39m\u001b[33m. Please add these properties \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 211\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mto the \u001b[39m\u001b[33m'\u001b[39m\u001b[33mproperties\u001b[39m\u001b[33m'\u001b[39m\u001b[33m section.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 212\u001b[39m )\n\u001b[32m 213\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
28+
"\u001b[31mValueError\u001b[39m: Schema 'lipidomics_file' with category 'data_file' must include properties 'data_type', 'data_format', and 'data_category'. Please add these properties to the 'properties' section."
29+
]
30+
}
31+
],
1932
"source": [
20-
"lipid_schema = g.utils.load_yaml(\"../tests/gen3_schema/testing/yaml_pass/demographic.yaml\")\n",
33+
"lipid_schema = g.utils.load_yaml(\"../output/lipidomics_file.yaml\")\n",
2134
"rule_val = g.validators.rule_validator.RuleValidator(lipid_schema)\n",
22-
"rule_val.validate()"
35+
"rule_val.data_file_props_need_data_props()"
2336
]
2437
},
2538
{

src/gen3schemadev/converter.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,19 @@ def construct_props(node_name: str, data: DataSourceProtocol) -> dict:
566566
props_dict['core_metadata_collections'] = {"$ref": "_definitions.yaml#/to_one"}
567567
props_dict['$ref'] = "_definitions.yaml#/data_file_properties"
568568

569+
props_dict['data_category'] = {
570+
"description": "Broad categorization of the contents of the data file.",
571+
"enum": ['data_category_1', 'data_category_2', 'data_category_3']
572+
}
573+
props_dict['data_format'] = {
574+
"description": "The format of the data in this data file",
575+
"enum": ['data_format_1', 'data_format_2', 'data_format_3']
576+
}
577+
props_dict['data_type'] = {
578+
"description": "The type of data in this data file",
579+
"enum": ['data_type_1', 'data_type_2', 'data_type_3']
580+
}
581+
569582
return props_dict
570583

571584

src/gen3schemadev/validators/rule_validator.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,4 +190,24 @@ def props_must_have_type(self):
190190
raise ValueError(
191191
f"Property '{key}' must have a value for 'type' or 'enum' in schema '{schema_id}'."
192192
)
193-
return True
193+
return True
194+
195+
def data_file_props_need_data_props(self):
196+
"""If the schema is category: data_file, then it must
197+
have the properties `data_type`, `data_format`, and `data_category`.
198+
"""
199+
200+
if not self.schema.get('category', '') == 'data_file':
201+
return True
202+
203+
props = self._get_props()
204+
prop_keys = set(props.keys())
205+
need_props = {"data_type", "data_format", "data_category"}
206+
if not need_props.issubset(prop_keys):
207+
schema_id = self.schema.get("id", "<unknown id>")
208+
raise ValueError(
209+
f"Schema '{schema_id}' with category 'data_file' must include properties "
210+
f"'data_type', 'data_format', and 'data_category'. Please add these properties "
211+
f"to the 'properties' section."
212+
)
213+
return True

tests/gen3_schema/testing/yaml_fail/lipidomics_file.yaml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,6 @@ properties:
5555
- tsv
5656
- xls
5757
- xlsx
58-
data_category:
59-
description: Broad categorization of the contents of the data file.
60-
enum:
61-
- mass spec raw
62-
- mass spec analysed
63-
- summarised results
6458
cv:
6559
description: Coefficient of variation (%)
6660
type: number

tests/test_converter.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,19 @@ def test_construct_prop_lipidomics_file(fixture_input_yaml_pass):
452452
"assays": {"$ref": "_definitions.yaml#/to_one"},
453453
"core_metadata_collections": {"$ref": "_definitions.yaml#/to_one"},
454454
'cv': {'description': 'Coefficient of variation (%)', 'type': 'number'}
455+
,
456+
"data_category": {
457+
"description": "Broad categorization of the contents of the data file.",
458+
"enum": ['data_category_1', 'data_category_2', 'data_category_3']
459+
},
460+
"data_format": {
461+
"description": "The format of the data in this data file",
462+
"enum": ['data_format_1', 'data_format_2', 'data_format_3']
463+
},
464+
"data_type": {
465+
"description": "The type of data in this data file",
466+
"enum": ['data_type_1', 'data_type_2', 'data_type_3']
467+
}
455468

456469
}
457470
assert result == expected
@@ -589,7 +602,19 @@ def fixture_expected_output_lipid():
589602
'samples': {'$ref': '_definitions.yaml#/to_one'},
590603
'assays': {'$ref': '_definitions.yaml#/to_one'},
591604
'core_metadata_collections': {'$ref': '_definitions.yaml#/to_one'},
592-
'cv': {'description': 'Coefficient of variation (%)', 'type': 'number'}
605+
'cv': {'description': 'Coefficient of variation (%)', 'type': 'number'},
606+
"data_category": {
607+
"description": "Broad categorization of the contents of the data file.",
608+
"enum": ['data_category_1', 'data_category_2', 'data_category_3']
609+
},
610+
"data_format": {
611+
"description": "The format of the data in this data file",
612+
"enum": ['data_format_1', 'data_format_2', 'data_format_3']
613+
},
614+
"data_type": {
615+
"description": "The type of data in this data file",
616+
"enum": ['data_type_1', 'data_type_2', 'data_type_3']
617+
}
593618
},
594619
'additionalProperties': False
595620
}

tests/test_rule_validator.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def test_get_props(fixture_rule_validator_pass, fixture_rule_validator_fail):
4949
assert "samples" in props
5050

5151
props_fail = fixture_rule_validator_fail._get_props()
52-
assert len(props_fail) == 7
52+
assert len(props_fail) == 6
5353
assert "type" in props_fail
5454

5555

@@ -107,3 +107,62 @@ def test_props_must_have_type_fail():
107107
with pytest.raises(ValueError) as excinfo:
108108
rule_validator.props_must_have_type()
109109
assert "must have a value for 'type' or 'enum'" in str(excinfo.value)
110+
111+
112+
def test_data_file_props_need_data_props():
113+
schema = {
114+
"id": "my_data_file",
115+
"category": "data_file",
116+
"properties": {
117+
"data_type": {"type": "string"},
118+
"data_format": {"type": "string"},
119+
"data_category": {"type": "string"},
120+
"other": {"type": "string"}
121+
}
122+
}
123+
rv = RuleValidator(schema)
124+
assert rv.data_file_props_need_data_props() is True
125+
126+
# Not a data_file node: should pass (returns True, does not care about props)
127+
non_data_file_schema = {
128+
"id": "my_non_data_file",
129+
"category": "project",
130+
"properties": {
131+
"some_field": {"type": "string"}
132+
}
133+
}
134+
rv2 = RuleValidator(non_data_file_schema)
135+
assert rv2.data_file_props_need_data_props() is True
136+
137+
# Failing: missing required property
138+
missing_data_type = {
139+
"id": "bad_data_file",
140+
"category": "data_file",
141+
"properties": {
142+
"data_format": {"type": "string"},
143+
"data_category": {"type": "string"}
144+
# missing 'data_type'
145+
}
146+
}
147+
rv3 = RuleValidator(missing_data_type)
148+
with pytest.raises(ValueError) as excinfo:
149+
rv3.data_file_props_need_data_props()
150+
assert "must include properties" in str(excinfo.value)
151+
assert "data_type" in str(excinfo.value)
152+
153+
# Failing: missing all required props
154+
missing_all = {
155+
"id": "bad_data_file2",
156+
"category": "data_file",
157+
"properties": {
158+
"foo": {"type": "string"},
159+
}
160+
}
161+
rv4 = RuleValidator(missing_all)
162+
with pytest.raises(ValueError) as excinfo2:
163+
rv4.data_file_props_need_data_props()
164+
assert "must include properties" in str(excinfo2.value)
165+
assert "data_type" in str(excinfo2.value)
166+
assert "data_format" in str(excinfo2.value)
167+
assert "data_category" in str(excinfo2.value)
168+

0 commit comments

Comments
 (0)