Skip to content

Commit 56ac7ca

Browse files
committed
optimize clinical data derived table related queries
1 parent c72741f commit 56ac7ca

File tree

2 files changed

+46
-16
lines changed

2 files changed

+46
-16
lines changed

src/main/resources/db-scripts/clickhouse/clickhouse.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ CREATE TABLE IF NOT EXISTS clinical_data_derived
316316
type LowCardinality(String)
317317
)
318318
ENGINE=MergeTree
319-
ORDER BY (type, attribute_name, sample_unique_id);
319+
ORDER BY (cancer_study_identifier, type, attribute_name, sample_unique_id);
320320

321321
-- Insert sample attribute data
322322
INSERT INTO TABLE clinical_data_derived

src/main/resources/mappers/clickhouse/clinical_data/ClickhouseClinicalDataMapper.xml

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,22 @@
55

66
<select id="getSampleClinicalDataFromStudyViewFilter" resultMap="clinicalDataSummaryResultMap">
77
SELECT
8-
internal_id as internalId,
9-
replaceOne(sample_unique_id, concat(cancer_study_identifier, '_'), '') as sampleId,
10-
replaceOne(patient_unique_id, concat(cancer_study_identifier, '_'), '') as patientId,
11-
attribute_name as attrId,
12-
attribute_value as attrValue,
13-
cancer_study_identifier as studyId
8+
internal_id as internalId,
9+
replaceOne(sample_unique_id, concat(cancer_study_identifier, '_'), '') as sampleId,
10+
replaceOne(patient_unique_id, concat(cancer_study_identifier, '_'), '') as patientId,
11+
attribute_name as attrId,
12+
attribute_value as attrValue,
13+
cancer_study_identifier as studyId
1414
FROM clinical_data_derived
1515
<where>
1616
<include refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.applyStudyViewFilterUsingSampleId"/>
1717
</where>
18+
<if test="studyViewFilterContext.customDataFilterCancerStudies != null and !studyViewFilterContext.customDataFilterCancerStudies.isEmpty()">
19+
AND cancer_study_identifier IN
20+
<foreach item="studyId" collection="studyViewFilterContext.customDataFilterCancerStudies" open="(" separator="," close=")">
21+
#{studyId}
22+
</foreach>
23+
</if>
1824
<if test="attributeIds != null and !attributeIds.isEmpty()">
1925
AND attribute_name IN
2026
<foreach item="attributeId" collection="attributeIds" open="(" separator="," close=")">
@@ -26,16 +32,22 @@
2632

2733
<select id="getPatientClinicalDataFromStudyViewFilter" resultMap="clinicalDataSummaryResultMap">
2834
SELECT
29-
internal_id as internalId,
30-
replaceOne(patient_unique_id, concat(cancer_study_identifier, '_'), '') as patientId,
31-
NULL as sampleId,
32-
attribute_name as attrId,
33-
attribute_value as attrValue,
34-
cancer_study_identifier as studyId
35+
internal_id as internalId,
36+
replaceOne(patient_unique_id, concat(cancer_study_identifier, '_'), '') as patientId,
37+
NULL as sampleId,
38+
attribute_name as attrId,
39+
attribute_value as attrValue,
40+
cancer_study_identifier as studyId
3541
FROM clinical_data_derived
3642
<where>
3743
<include refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.applyStudyViewFilterUsingPatientId"/>
3844
</where>
45+
<if test="studyViewFilterContext.customDataFilterCancerStudies != null and !studyViewFilterContext.customDataFilterCancerStudies.isEmpty()">
46+
AND cancer_study_identifier IN
47+
<foreach item="studyId" collection="studyViewFilterContext.customDataFilterCancerStudies" open="(" separator="," close=")">
48+
#{studyId}
49+
</foreach>
50+
</if>
3951
<if test="attributeIds != null and !attributeIds.isEmpty()">
4052
AND attribute_name IN
4153
<foreach item="attributeId" collection="attributeIds" open="(" separator="," close=")">
@@ -107,6 +119,12 @@
107119

108120
<sql id="fetchClinicalDataWhereClause">
109121
WHERE cd.type = #{clinicalDataType}
122+
<if test="studyIds != null and !studyIds.isEmpty()">
123+
AND cd.cancer_study_identifier IN
124+
<foreach item="studyId" collection="studyIds" open="(" separator="," close=")">
125+
#{studyId}
126+
</foreach>
127+
</if>
110128
<if test="uniqueIds != null and !uniqueIds.isEmpty()">
111129
AND
112130
<choose>
@@ -134,6 +152,12 @@
134152
SELECT COUNT(*)
135153
FROM clinical_data_derived cd
136154
WHERE cd.type = #{clinicalDataType}
155+
<if test="studyIds != null and !studyIds.isEmpty()">
156+
AND cd.cancer_study_identifier IN
157+
<foreach item="studyId" collection="studyIds" open="(" separator="," close=")">
158+
#{studyId}
159+
</foreach>
160+
</if>
137161
<if test="uniqueIds != null and !uniqueIds.isEmpty()">
138162
AND
139163
<choose>
@@ -191,15 +215,21 @@
191215
(
192216
WITH clinical_data_query AS (
193217
SELECT
194-
attribute_name AS attributeId,
195-
attribute_value AS value,
196-
cast(count(*) AS INTEGER) as count
218+
attribute_name AS attributeId,
219+
attribute_value AS value,
220+
cast(count(*) AS INTEGER) as count
197221
FROM clinical_data_derived cdd
198222
<if test="'${isConflicting}' == 'true'">
199223
<!-- JOIN patient data with sample table to map patient-level attributes to sample-level counts -->
200224
LEFT JOIN sample_derived sd ON cdd.patient_unique_id = sd.patient_unique_id
201225
</if>
202226
<where>
227+
<if test="studyViewFilterContext.customDataFilterCancerStudies != null and !studyViewFilterContext.customDataFilterCancerStudies.isEmpty()">
228+
AND cdd.cancer_study_identifier IN
229+
<foreach item="studyId" collection="studyViewFilterContext.customDataFilterCancerStudies" open="(" separator="," close=")">
230+
#{studyId}
231+
</foreach>
232+
</if>
203233
AND <!-- Table creation in clickhouse.sql has ensured no NA values but extra caution is always appreciated -->
204234
<include refid="org.cbioportal.infrastructure.repository.clickhouse.studyview.ClickhouseStudyViewFilterMapper.normalizeAttributeValue">
205235
<property name="attribute_value" value="value"/>

0 commit comments

Comments
 (0)