Skip to content

Commit d43a045

Browse files
authored
Merge pull request #5049 from broadinstitute/dev
Dev
2 parents ec21bfd + 6fada88 commit d43a045

File tree

64 files changed

+738
-2621
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+738
-2621
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
# Maps tcp port 5432 on service container to the host
4242
- 5432:5432
4343
clickhouse:
44-
image: bitnami/clickhouse:latest
44+
image: bitnamilegacy/clickhouse:latest
4545
ports:
4646
- 9000:9000 # Native client interface
4747
volumes:

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
## dev
44

55
## 9/18/25
6-
# Deprecate hail backend support
6+
* Store gene ids on SavedVariant model (REQUIRES DB MIGRATION)
7+
8+
## 9/29/25
9+
* Deprecate hail backend support
710

811
## 8/7/25
912
* Deprecate support for lifting existing projects from GRCH37 to GRCh38

clickhouse_search/backend/functions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77
class Array(Func):
88
function = 'array'
99

10+
def _resolve_output_field(self):
11+
output_field = super()._resolve_output_field()
12+
if not isinstance(output_field, ArrayField):
13+
output_field = ArrayField(base_field=output_field)
14+
return output_field
15+
1016

1117
class ArrayConcat(Func):
1218
function = 'arrayConcat'

clickhouse_search/fixtures/clickhouse_saved_variants.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@
213213
"pk": 101,
214214
"fields": {
215215
"key": 101,
216-
"variant_id": "suffix_19107_DEL"
216+
"variant_id": "suffix_19107_DEL_013746"
217217
}
218218
}, {
219219
"model": "clickhouse_search.annotationssv",
@@ -225,7 +225,7 @@
225225
"end_chrom": null,
226226
"pos": 249045487,
227227
"end": 249045898,
228-
"variant_id": "suffix_19107_DEL",
228+
"variant_id": "suffix_19107_DEL_013746",
229229
"lifted_over_chrom": null,
230230
"lifted_over_pos": null,
231231
"rg37_locus_end": [null, null],
@@ -250,7 +250,7 @@
250250
"pk": 111,
251251
"fields": {
252252
"key": 111,
253-
"variant_id": "prefix_70191_DEL"
253+
"variant_id": "prefix_19107_DEL"
254254
}
255255
}, {
256256
"model": "clickhouse_search.annotationsgcnv",
@@ -261,7 +261,7 @@
261261
"chrom": "1",
262262
"pos": 249045487,
263263
"end": 249045898,
264-
"variant_id": "prefix_70191_DEL",
264+
"variant_id": "prefix_19107_DEL",
265265
"lifted_over_chrom": "2",
266266
"lifted_over_pos": null,
267267
"rg37_locus_end": [null, null],

clickhouse_search/fixtures/clickhouse_search.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
"populations": [
4949
[35805, 0.29499999, 121372, 0.41530353, 0, 24061, 5872],
5050
[72672, 0.28899795, 251462, 0.4116475, 0, 11567],
51-
[0, 0, 0, 0, 0, 0],
51+
[927, 0.03444932, 26912, 0.04027665, 0, 0],
5252
[65154, 0.246152, 264690, 47604, 8775]
5353
],
5454
"sorted_transcript_consequences": [
@@ -124,7 +124,7 @@
124124
],
125125
"sorted_transcript_consequences": [
126126
[null, null, ["splice_donor_variant"], true, null, "ENSG00000097046"],
127-
[null, 1, ["missense_variant"], false, null, "ENSG00000097046"],
127+
[0.1, 1, ["missense_variant"], false, null, "ENSG00000097046"],
128128
[null, null, ["missense_variant"], false, null, "ENSG00000097046"]
129129
],
130130
"sorted_motif_feature_consequences": [
@@ -681,7 +681,7 @@
681681
],
682682
"sign": 1,
683683
"calls": [
684-
["HG00733", 1, 99, 0.6530612111091614, 49],
684+
["HG00733", null, 0, null, null],
685685
["HG00731", 2, 48, 1, 16]
686686
]
687687
}

clickhouse_search/fixtures/clickhouse_transcripts.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@
113113
], null]
114114
],
115115
[
116-
[null], null, "protein_coding", 1, null, ["missense_variant"], [null, null], "ENSG00000097046",
116+
[0.1], null, "protein_coding", 1, null, ["missense_variant"], [null, null], "ENSG00000097046",
117117
"ENST00000350997.12:c.375+139G>A", null, [1, 11], [null, []], "missense_variant", null, "NM_013402.7",
118118
"NM_013402.7", [false], "ENST00000350997", 1, [null, null, null, [
119119
null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null

clickhouse_search/management/commands/set_saved_variant_key.py

Lines changed: 2 additions & 2080 deletions
Large diffs are not rendered by default.

clickhouse_search/management/tests/set_saved_variant_key_tests.py

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def setUpTestData(cls):
2626
@mock.patch('seqr.utils.file_utils.subprocess.Popen')
2727
def test_command(self, mock_subprocess):
2828
mock_subprocess.return_value.stdout = self.MOCK_GCNV_DATA
29+
mock_subprocess.return_value.wait.return_value = 0
2930

3031
call_command('set_saved_variant_key')
3132
self.assert_json_logs(user=None, expected=[
@@ -39,21 +40,14 @@ def test_command(self, mock_subprocess):
3940
('Updated batch of 2', None),
4041
('Updated keys for 2 SNV_INDEL (GRCh38) variants', None),
4142
('Finding keys for 2 SV_WGS (GRCh38) variant ids', None),
42-
('Found 0 keys', None),
43-
('Finding keys for 2 SV_WES (GRCh38) variant ids', None),
44-
('Found 0 keys', None),
45-
('2 SV variants have no key, 0 of which have no search data, 0 of which are known to have dropped out of the callset.', None),
46-
('==> gsutil cat gs://seqr-datasets-gcnv/GRCh38/RDG_WES_Broad_Internal/v4/CMG_gCNV_2022_annotated.ensembl.round2_3.strvctvre.tsv.gz | gunzip -c -q - ', None),
47-
('Mapping reloaded SV_WES IDs to latest version', None),
48-
('Finding keys for 1 SV_WES (GRCh38) variant ids', None),
4943
('Found 1 keys', None),
5044
('Updated batch of 1', None),
51-
('Updated keys for 1 SV_WES (GRCh38) variants', None),
52-
('Mapping reloaded SV_WGS IDs to latest version', None),
53-
('Finding keys for 1 SV_WGS (GRCh38) variant ids', None),
45+
('Updated keys for 1 SV_WGS (GRCh38) variants', None),
46+
('No key found for 1 variants', None),
47+
('Finding keys for 1 SV_WES (GRCh38) variant ids', None),
5448
('Found 1 keys', None),
5549
('Updated batch of 1', None),
56-
('Updated keys for 1 SV_WGS (GRCh38) variants', None),
50+
('Updated keys for 1 SV_WES (GRCh38) variants', None),
5751
('Finding keys for 7 SNV_INDEL (GRCh37) variant ids', None),
5852
('Found 1 keys', None),
5953
('Updated batch of 1', None),
@@ -70,9 +64,9 @@ def test_command(self, mock_subprocess):
7064
{'guid': 'SV0000002_1248367227_r0390_100', 'key': 100, 'variant_id': '1-248367227-TC-T', 'dataset_type': 'SNV_INDEL', 'genotypes': {'I000004_hg00731': {'numAlt': 2}, 'I000005_hg00732': {'numAlt': 1}}, 'saved_variant_json': {}},
7165
{'guid': 'SV0000006_1248367227_r0003_tes', 'key': 100, 'variant_id': '1-248367227-TC-T', 'dataset_type': 'SNV_INDEL', 'genotypes': {'I000002_na19675': mock.ANY, 'I000017_na20889': mock.ANY}, 'saved_variant_json': {}},
7266
{'guid': 'SV0000006_1248367227_r0004_non', 'key': 100, 'variant_id': '1-248367227-TC-T', 'dataset_type': 'SNV_INDEL', 'genotypes': {'I000018_na21234': {'sampleId': 'NA20885', 'ab': 0.0, 'gq': 99.0, 'numAlt': 1}}, 'saved_variant_json': {}},
73-
{'guid': 'SV0000007_prefix_19107_DEL_r00', 'key': 111, 'variant_id': 'prefix_70191_DEL', 'dataset_type': 'SV_WES', 'genotypes': {'I000017_na20889': { 'cn': 1, 'sampleId': 'NA20885', 'numAlt': -1, 'defragged': False, 'qs': 33, 'numExon': 2}}, 'saved_variant_json': {}},
67+
{'guid': 'SV0000007_prefix_19107_DEL_r00', 'key': 111, 'variant_id': 'prefix_19107_DEL', 'dataset_type': 'SV_WES', 'genotypes': {'I000017_na20889': { 'cn': 1, 'sampleId': 'NA20885', 'numAlt': -1, 'defragged': False, 'qs': 33, 'numExon': 2}}, 'saved_variant_json': {}},
7468
{'guid': 'SV0000009_25000014783_r0004_no', 'key': 100, 'variant_id': 'M-14783-T-C', 'dataset_type': 'MITO', 'genotypes': {'I000018_na21234': mock.ANY}, 'saved_variant_json': {}},
75-
{'guid': 'SV0000013_prefix_19107_DEL_r00', 'key': 101, 'variant_id': 'suffix_19107_DEL', 'dataset_type': 'SV_WGS', 'genotypes': {'I000018_na21234': mock.ANY}, 'saved_variant_json': {}},
69+
{'guid': 'SV0000013_prefix_19107_DEL_r00', 'key': 101, 'variant_id': 'suffix_19107_DEL_013746', 'dataset_type': 'SV_WGS', 'genotypes': {'I000018_na21234': mock.ANY}, 'saved_variant_json': {}},
7670
{'guid': 'SV0027166_191912634_r0384_rare', 'key': None, 'variant_id': '19-1912634-C-T', 'dataset_type': None, 'genotypes': mock.ANY, 'saved_variant_json': mock.ANY},
7771
{'guid': 'SV0027167_191912633_r0384_rare', 'key': None, 'variant_id': '19-1912633-G-T', 'dataset_type': None, 'genotypes': mock.ANY, 'saved_variant_json': mock.ANY},
7872
{'guid': 'SV0027168_191912632_r0384_rare', 'key': None, 'variant_id': '19-1912632-G-C', 'dataset_type': None, 'genotypes': mock.ANY, 'saved_variant_json': mock.ANY},
@@ -106,6 +100,7 @@ class SetSavedVariantKeyFailedMappingTest(SetSavedVariantKeyTest):
106100
@mock.patch('seqr.utils.file_utils.subprocess.Popen')
107101
def test_command(self, mock_subprocess):
108102
mock_subprocess.return_value.stdout = self.MOCK_GCNV_DATA
103+
mock_subprocess.return_value.wait.return_value = 0
109104

110105
call_command('set_saved_variant_key')
111106
self.assert_json_logs(user=None, expected=[
@@ -121,12 +116,8 @@ def test_command(self, mock_subprocess):
121116
('Finding keys for 2 SV_WES (GRCh38) variant ids', None),
122117
('Found 0 keys', None),
123118
('2 SV variants have no key, 0 of which have no search data, 0 of which are known to have dropped out of the callset.', None),
124-
('==> gsutil cat gs://seqr-datasets-gcnv/GRCh38/RDG_WES_Broad_Internal/v4/CMG_gCNV_2022_annotated.ensembl.round2_3.strvctvre.tsv.gz | gunzip -c -q - ', None),
125-
('Mapping reloaded SV_WGS IDs to latest version', None),
126-
('Finding keys for 1 SV_WGS (GRCh38) variant ids', None),
127-
('Found 0 keys', None),
128-
("1 variants failed ID mapping: ['suffix_19107_DEL']", None),
129119
('1 remaining SV WES variants prefix_19107_DEL - 12', None),
120+
('1 remaining SV WGS variants suffix_19107_DEL_013746 - fam14', None),
130121
('Finding keys for 7 SNV_INDEL (GRCh37) variant ids', None),
131122
('Found 0 keys', None),
132123
('7 variants have no key, 0 of which have no search data, 7 of which are absent from the hail backend.', None),

0 commit comments

Comments
 (0)