Skip to content

Commit 6f65b0a

Browse files
authored
Use mutation_derived table in mutation mapper to avoid joins (#11766)
1 parent ba502c6 commit 6f65b0a

File tree

1 file changed

+85
-107
lines changed

1 file changed

+85
-107
lines changed

src/main/resources/org/cbioportal/legacy/persistence/mybatis/MutationMapper.xml

Lines changed: 85 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -4,49 +4,49 @@
44
<mapper namespace="org.cbioportal.legacy.persistence.mybatis.MutationMapper">
55

66
<sql id="select">
7-
genetic_profile.stable_id AS "molecularProfileId",
8-
sample.stable_id AS "sampleId",
9-
patient.stable_id AS "patientId",
10-
mutation.entrez_gene_id AS "entrezGeneId",
11-
cancer_study.cancer_study_identifier AS "studyId"
7+
molecularProfileId,
8+
sampleId,
9+
patientId,
10+
entrezGeneId,
11+
studyId
1212
<if test="projection == 'SUMMARY' || projection == 'DETAILED'">
1313
,
14-
mutation.center AS "center",
15-
mutation.mutation_status AS "mutationStatus",
16-
mutation.validation_status AS "validationStatus",
17-
mutation.tumor_alt_count AS "tumorAltCount",
18-
mutation.tumor_ref_count AS "tumorRefCount",
19-
mutation.normal_alt_count AS "normalAltCount",
20-
mutation.normal_ref_count AS "normalRefCount",
21-
mutation.amino_acid_change AS "aminoAcidChange",
22-
mutation_event.chr AS "chr",
23-
mutation_event.start_position AS "startPosition",
24-
mutation_event.end_position AS "endPosition",
25-
mutation_event.reference_allele AS "referenceAllele",
26-
mutation_event.tumor_seq_allele AS "tumorSeqAllele",
27-
mutation_event.protein_change AS "proteinChange",
28-
mutation_event.mutation_type AS "mutationType",
29-
mutation_event.ncbi_build AS "ncbiBuild",
30-
mutation_event.variant_type AS "variantType",
31-
mutation_event.refseq_mrna_id AS "refseqMrnaId",
32-
mutation_event.protein_pos_start AS "proteinPosStart",
33-
mutation_event.protein_pos_end AS "proteinPosEnd",
34-
mutation_event.keyword AS "keyword",
35-
mutation.annotation_json AS "annotationJSON",
36-
alteration_driver_annotation.driver_filter AS "driverFilter",
37-
alteration_driver_annotation.driver_filter_annotation AS "driverFilterAnnotation",
38-
alteration_driver_annotation.driver_tiers_filter AS "driverTiersFilter",
39-
alteration_driver_annotation.driver_tiers_filter_annotation as "driverTiersFilterAnnotation"
14+
center,
15+
mutationStatus,
16+
validationStatus,
17+
tumorAltCount,
18+
tumorRefCount,
19+
normalAltCount,
20+
normalRefCount,
21+
aminoAcidChange,
22+
chr,
23+
startPosition,
24+
endPosition,
25+
referenceAllele,
26+
tumorSeqAllele,
27+
proteinChange,
28+
mutationType,
29+
ncbiBuild,
30+
variantType,
31+
refseqMrnaId,
32+
proteinPosStart,
33+
proteinPosEnd,
34+
keyword,
35+
annotationJSON,
36+
driverFilter,
37+
driverFilterAnnotation,
38+
driverTiersFilter,
39+
driverTiersFilterAnnotation
4040
</if>
4141
<if test="projection == 'DETAILED'">
42-
,
43-
<include refid="org.cbioportal.legacy.persistence.mybatis.GeneMapper.select">
44-
<property name="prefix" value="GENE."/>
45-
</include>
46-
,
42+
,
43+
GENE.entrezGeneId,
44+
GENE.hugoGeneSymbol,
45+
GENE.type,
4746
<include refid="getAlleleSpecificCopyNumber">
48-
<property name="prefix" value="alleleSpecificCopyNumber."/>
49-
</include>
47+
<property name="prefix" value="alleleSpecificCopyNumber."/>
48+
</include>
49+
5050
</if>
5151
</sql>
5252

@@ -55,70 +55,62 @@
5555
ORDER BY "${sortBy}" ${direction}
5656
</if>
5757
<if test="projection == 'ID'">
58-
ORDER BY genetic_profile.stable_id ASC, sample.stable_id ASC, mutation.entrez_gene_id ASC
58+
ORDER BY molecularProfileId ASC, sampleId ASC, entrezGeneId ASC
5959
</if>
6060
<if test="limit != null and limit != 0">
6161
LIMIT #{limit} OFFSET #{offset}
6262
</if>
6363
</sql>
6464

6565
<sql id="from">
66-
FROM mutation
67-
INNER JOIN genetic_profile ON mutation.genetic_profile_id = genetic_profile.genetic_profile_id
68-
INNER JOIN sample ON mutation.sample_id = sample.internal_id
69-
INNER JOIN patient ON sample.patient_id = patient.internal_id
70-
INNER JOIN cancer_study ON patient.cancer_study_id = cancer_study.cancer_study_id
71-
LEFT JOIN alteration_driver_annotation ON
72-
mutation.genetic_profile_id = alteration_driver_annotation.genetic_profile_id
73-
AND mutation.sample_id = alteration_driver_annotation.sample_id
74-
AND mutation.mutation_event_id = alteration_driver_annotation.alteration_event_id
66+
FROM mutation_derived
7567
</sql>
7668

7769
<sql id="where">
7870
<where>
79-
genetic_profile.stable_id = #{molecularProfileId}
71+
molecularProfileId = #{molecularProfileId}
8072
<if test="sampleIds != null and !sampleIds.isEmpty()">
81-
AND sample.stable_id IN
73+
AND sampleId IN
8274
<foreach item="item" collection="sampleIds" open="(" separator="," close=")">#{item}</foreach>
8375
</if>
8476
<if test="_parameter.containsKey('entrezGeneIds') and entrezGeneIds != null and !entrezGeneIds.isEmpty()">
85-
AND mutation.entrez_gene_id IN
77+
AND entrezGeneId IN
8678
<foreach item="item" collection="entrezGeneIds" open="(" separator="," close=")">#{item}</foreach>
8779
</if>
8880
<if test="_parameter.containsKey('geneQueries') and geneQueries != null and !geneQueries.isEmpty()">
8981
<include refid="whereWithGeneQueries"/>
9082
</if>
9183
<if test="snpOnly == true">
92-
AND mutation_event.reference_allele IN ('A','T','C','G')
93-
AND mutation_event.tumor_seq_allele IN ('A','T','C','G')
84+
AND referenceAllele IN ('A','T','C','G')
85+
AND tumorSeqAllele IN ('A','T','C','G')
9486
</if>
9587
</where>
9688
</sql>
9789

9890
<sql id="whereBySampleListId">
9991
<where>
100-
genetic_profile.stable_id = #{molecularProfileId}
101-
AND mutation.sample_id IN
92+
molecularProfileId = #{molecularProfileId}
93+
AND sampleInternalId IN
10294
(
10395
SELECT sample_list_list.sample_id FROM sample_list_list
10496
INNER JOIN sample_list ON sample_list_list.list_id = sample_list.list_id
10597
WHERE sample_list.stable_id = #{sampleListId}
10698
)
10799
<if test="entrezGeneIds != null and !entrezGeneIds.isEmpty()">
108-
AND mutation.entrez_gene_id IN
100+
AND entrezGeneId IN
109101
<foreach item="item" collection="entrezGeneIds" open="(" separator="," close=")">#{item}</foreach>
110102
</if>
111103
<if test="snpOnly == true">
112-
AND mutation_event.reference_allele IN ('A','T','C','G')
113-
AND mutation_event.tumor_seq_allele IN ('A','T','C','G')
104+
AND referenceAllele IN ('A','T','C','G')
105+
AND tumorSeqAllele IN ('A','T','C','G')
114106
</if>
115107
</where>
116108
</sql>
117109

118110
<sql id="whereInMultipleMolecularProfiles">
119111
<where>
120112
<if test="sampleIds != null and !sampleIds.isEmpty()">
121-
mutation.sample_id IN (
113+
sample_id IN (
122114
SELECT sample.internal_id FROM sample
123115
INNER JOIN patient ON sample.patient_id = patient.internal_id
124116
INNER JOIN genetic_profile ON patient.cancer_study_id = genetic_profile.cancer_study_id
@@ -143,19 +135,19 @@
143135
)
144136
</if>
145137
<if test="sampleIds == null || sampleIds.isEmpty()">
146-
genetic_profile.stable_id IN
138+
molecularProfileId IN
147139
<foreach item="item" collection="molecularProfileIds" open="(" separator="," close=")">#{item}</foreach>
148140
</if>
149141
<if test="_parameter.containsKey('entrezGeneIds') and entrezGeneIds != null and !entrezGeneIds.isEmpty()">
150-
AND mutation.entrez_gene_id IN
142+
AND entrezGeneId IN
151143
<foreach item="item" collection="entrezGeneIds" open="(" separator="," close=")">#{item}</foreach>
152144
</if>
153145
<if test="_parameter.containsKey('geneQueries') and geneQueries != null and !geneQueries.isEmpty()">
154146
<include refid="whereWithGeneQueries"/>
155147
</if>
156148
<if test="snpOnly == true">
157-
AND mutation_event.reference_allele IN ('A','T','C','G')
158-
AND mutation_event.tumor_seq_allele IN ('A','T','C','G')
149+
AND referenceAllele IN ('A','T','C','G')
150+
AND tumorSeqAllele IN ('A','T','C','G')
159151
</if>
160152
</where>
161153
</sql>
@@ -245,20 +237,20 @@
245237
</sql>
246238

247239
<sql id="getAlleleSpecificCopyNumber">
248-
allele_specific_copy_number.ascn_integer_copy_number AS "${prefix}ascnIntegerCopyNumber",
249-
allele_specific_copy_number.ascn_method AS "${prefix}ascnMethod",
250-
allele_specific_copy_number.ccf_expected_copies_upper AS "${prefix}ccfExpectedCopiesUpper",
251-
allele_specific_copy_number.ccf_expected_copies AS "${prefix}ccfExpectedCopies",
252-
allele_specific_copy_number.clonal AS "${prefix}clonal",
253-
allele_specific_copy_number.minor_copy_number AS "${prefix}minorCopyNumber",
254-
allele_specific_copy_number.expected_alt_copies AS "${prefix}expectedAltCopies",
255-
allele_specific_copy_number.total_copy_number AS "${prefix}totalCopyNumber"
240+
alleleSpecificCopyNumber.ascnIntegerCopyNumber,
241+
alleleSpecificCopyNumber.ascnMethod,
242+
alleleSpecificCopyNumber.ccfExpectedCopiesUpper,
243+
alleleSpecificCopyNumber.ccfExpectedCopies,
244+
alleleSpecificCopyNumber.clonal,
245+
alleleSpecificCopyNumber.minorCopyNumber,
246+
alleleSpecificCopyNumber.expectedAltCopies,
247+
alleleSpecificCopyNumber.totalCopyNumber
256248
</sql>
257249

258250
<sql id="includeAlleleSpecificCopyNumber">
259-
LEFT JOIN allele_specific_copy_number ON mutation.mutation_event_id = allele_specific_copy_number.mutation_event_id
260-
AND mutation.genetic_profile_id = allele_specific_copy_number.genetic_profile_id
261-
AND mutation.sample_id = allele_specific_copy_number.sample_id
251+
-- LEFT JOIN allele_specific_copy_number ON mutation.mutation_event_id = allele_specific_copy_number.mutation_event_id
252+
-- AND mutation.genetic_profile_id = allele_specific_copy_number.genetic_profile_id
253+
-- AND mutation.sample_id = allele_specific_copy_number.sample_id
262254
</sql>
263255

264256
<resultMap id="genomicDataCountItem" type="org.cbioportal.legacy.model.GenomicDataCountItem">
@@ -279,10 +271,8 @@
279271
SELECT
280272
<include refid="select"/>
281273
<include refid="from"/>
282-
INNER JOIN mutation_event ON mutation.mutation_event_id = mutation_event.mutation_event_id
283274
<if test="projection == 'DETAILED'">
284-
INNER JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id
285-
<include refid="includeAlleleSpecificCopyNumber"/>
275+
286276
</if>
287277
<include refid="whereBySampleListId"/>
288278
<if test="sortBy != null and projection != 'ID'">
@@ -299,19 +289,16 @@
299289
<select id="getMetaMutationsBySampleListId" resultType="org.cbioportal.legacy.model.meta.MutationMeta">
300290
SELECT
301291
COUNT(*) AS "totalCount",
302-
COUNT(DISTINCT(mutation.sample_id)) AS "sampleCount"
292+
COUNT(DISTINCT(sampleId)) AS "sampleCount"
303293
<include refid="from"/>
304-
INNER JOIN mutation_event ON mutation.mutation_event_id = mutation_event.mutation_event_id
305294
<include refid="whereBySampleListId"/>
306295
</select>
307296

308297
<select id="getMutationsInMultipleMolecularProfiles" resultType="org.cbioportal.legacy.model.Mutation">
309298
SELECT
310299
<include refid="select"/>
311300
<include refid="from"/>
312-
INNER JOIN mutation_event ON mutation.mutation_event_id = mutation_event.mutation_event_id
313301
<if test="projection == 'DETAILED'">
314-
INNER JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id
315302
<include refid="includeAlleleSpecificCopyNumber"/>
316303
</if>
317304
<include refid="whereInMultipleMolecularProfiles"/>
@@ -322,9 +309,7 @@
322309
SELECT
323310
<include refid="select"/>
324311
<include refid="from"/>
325-
INNER JOIN mutation_event ON mutation.mutation_event_id = mutation_event.mutation_event_id
326312
<if test="projection == 'DETAILED'">
327-
INNER JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id
328313
<include refid="includeAlleleSpecificCopyNumber"/>
329314
</if>
330315
<include refid="whereInMultipleMolecularProfiles"/>
@@ -334,33 +319,28 @@
334319
<select id="getMetaMutationsInMultipleMolecularProfiles" resultType="org.cbioportal.legacy.model.meta.MutationMeta">
335320
SELECT
336321
COUNT(*) AS "totalCount",
337-
COUNT(DISTINCT(mutation.sample_id)) AS "sampleCount"
322+
COUNT(DISTINCT(sampleId)) AS "sampleCount"
338323
<include refid="from"/>
339-
INNER JOIN mutation_event ON mutation.mutation_event_id = mutation_event.mutation_event_id
340324
<include refid="whereInMultipleMolecularProfiles"/>
341325
</select>
342326

343327
<select id="getMetaMutationsBySampleIds" resultType="org.cbioportal.legacy.model.meta.MutationMeta">
344328
SELECT
345329
COUNT(*) AS "totalCount",
346-
COUNT(DISTINCT(mutation.sample_id)) AS "sampleCount"
330+
COUNT(DISTINCT(sampleId)) AS "sampleCount"
347331
<include refid="from"/>
348-
INNER JOIN mutation_event ON mutation_event.mutation_event_id = mutation.mutation_event_id
332+
349333
<include refid="where"/>
350334
</select>
351335

352336
<select id="getSampleCountByEntrezGeneIdsAndSampleIds" resultType="org.cbioportal.legacy.model.MutationCountByGene">
353337
SELECT
354-
mutation.entrez_gene_id AS "entrezGeneId",
338+
entrezGeneId AS "entrezGeneId",
355339
<!-- TODO: check this-->
356-
ANY_VALUE(gene.hugo_gene_symbol) AS "hugoGeneSymbol",
340+
ANY_VALUE(GENE.hugoGeneSymbol) AS "hugoGeneSymbol",
357341
COUNT(*) AS "totalCount",
358-
COUNT(DISTINCT(mutation.sample_id)) AS "numberOfAlteredCases"
359-
FROM mutation
360-
INNER JOIN mutation_event ON mutation_event.mutation_event_id = mutation.mutation_event_id
361-
INNER JOIN genetic_profile ON mutation.genetic_profile_id = genetic_profile.genetic_profile_id
362-
INNER JOIN sample ON mutation.sample_id = sample.internal_id
363-
INNER JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id
342+
COUNT(DISTINCT(sampleId)) AS "numberOfAlteredCases"
343+
FROM mutation_derived
364344
<include refid="where"/>
365345
GROUP BY mutation.entrez_gene_id
366346
</select>
@@ -371,26 +351,24 @@
371351
#{proteinPosStart} AS "proteinPosStart",
372352
#{proteinPosEnd} AS "proteinPosEnd",
373353
COUNT(*) AS "count"
374-
FROM mutation
375-
INNER JOIN mutation_event ON mutation.mutation_event_id = mutation_event.mutation_event_id
376-
WHERE mutation_event.entrez_gene_id = #{entrezGeneId}
377-
AND mutation_event.protein_pos_start >= #{proteinPosStart}
378-
AND mutation_event.protein_pos_end <![CDATA[ <= ]]> #{proteinPosEnd}
354+
FROM mutation_derived
355+
356+
WHERE entrezGeneId = #{entrezGeneId}
357+
AND proteinPosStart >= #{proteinPosStart}
358+
AND proteinPosEnd <![CDATA[ <= ]]> #{proteinPosEnd}
379359
</select>
380360

381361
<select id="getMutationCountsByType" resultMap="genomicDataCountItem">
382362
SELECT
383363
<!-- TODO: check this-->
384-
ANY_VALUE(gene.hugo_gene_symbol) as hugoGeneSymbol,
364+
ANY_VALUE(GENE.hugoGeneSymbol) as hugoGeneSymbol,
385365
#{profileType} as profileType,
386-
REPLACE(mutation_event.mutation_type, '_', ' ') AS label,
387-
mutation_event.mutation_type AS value,
366+
REPLACE(mutationType, '_', ' ') AS label,
367+
mutationType AS value,
388368
COUNT(*) AS count,
389-
COUNT(DISTINCT(sample.internal_id)) AS uniqueCount
369+
COUNT(DISTINCT(sampleInternalId)) AS uniqueCount
390370
<include refid="from"/>
391-
INNER JOIN mutation_event ON mutation.mutation_event_id = mutation_event.mutation_event_id
392-
INNER JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id
393371
<include refid="whereInMultipleMolecularProfiles"/>
394-
GROUP BY mutation_event.mutation_type
372+
GROUP BY mutationType
395373
</select>
396374
</mapper>

0 commit comments

Comments
 (0)