Skip to content

Commit 8d70900

Browse files
authored
Refactor Clickhouse enrichments endpoint to avoid redundant counting of profiling status for genes which are covered by the same panels
1 parent 8b21537 commit 8d70900

File tree

14 files changed

+2610
-52
lines changed

14 files changed

+2610
-52
lines changed
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
package org.cbioportal;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertNotNull;
5+
import static org.junit.jupiter.api.Assertions.assertTrue;
6+
7+
import java.util.Arrays;
8+
9+
import org.cbioportal.legacy.model.AlterationEnrichment;
10+
import org.cbioportal.legacy.model.EnrichmentType;
11+
import org.junit.jupiter.api.Test;
12+
import org.junit.jupiter.api.extension.ExtendWith;
13+
import org.springframework.beans.factory.annotation.Autowired;
14+
import org.springframework.boot.test.web.client.TestRestTemplate;
15+
import org.springframework.boot.test.web.server.LocalServerPort;
16+
import org.springframework.http.HttpEntity;
17+
import org.springframework.http.HttpHeaders;
18+
import org.springframework.http.HttpMethod;
19+
import org.springframework.http.HttpStatus;
20+
import org.springframework.http.MediaType;
21+
import org.springframework.http.ResponseEntity;
22+
import org.springframework.test.context.junit.jupiter.SpringExtension;
23+
24+
@ExtendWith(SpringExtension.class)
25+
class AlterationEnrichmentControllerE2ETest extends AbstractE2ETest {
26+
27+
@Autowired
28+
private TestRestTemplate restTemplate;
29+
30+
@LocalServerPort
31+
private int port;
32+
33+
private static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new com.fasterxml.jackson.databind.ObjectMapper();
34+
35+
private AlterationEnrichment[] callEnrichmentEndpoint(String testDataJson) throws Exception {
36+
return callEnrichmentEndpoint(testDataJson, EnrichmentType.SAMPLE);
37+
}
38+
39+
private AlterationEnrichment[] callEnrichmentEndpoint(String testDataJson, EnrichmentType enrichmentType) throws Exception {
40+
HttpHeaders headers = new HttpHeaders();
41+
headers.setContentType(MediaType.APPLICATION_JSON);
42+
HttpEntity<String> requestEntity = new HttpEntity<>(testDataJson, headers);
43+
44+
ResponseEntity<String> response = restTemplate.exchange(
45+
"http://localhost:" + port + "/api/column-store/alteration-enrichments/fetch?enrichmentType=" + enrichmentType,
46+
HttpMethod.POST,
47+
requestEntity,
48+
String.class
49+
);
50+
51+
assertEquals(HttpStatus.OK, response.getStatusCode());
52+
assertNotNull(response.getBody());
53+
return OBJECT_MAPPER.readValue(response.getBody(), AlterationEnrichment[].class);
54+
}
55+
56+
private AlterationEnrichment findGeneEnrichment(AlterationEnrichment[] enrichments, String geneSymbol) {
57+
return Arrays.stream(enrichments)
58+
.filter(enrichment -> geneSymbol.equals(enrichment.getHugoGeneSymbol()))
59+
.findFirst()
60+
.orElse(null);
61+
}
62+
63+
private int getTotalProfiledSamples(AlterationEnrichment enrichment) {
64+
return enrichment.getCounts().stream()
65+
.mapToInt(count -> count.getProfiledCount())
66+
.sum();
67+
}
68+
69+
private int getTotalAlteredSamples(AlterationEnrichment enrichment) {
70+
return enrichment.getCounts().stream()
71+
.mapToInt(count -> count.getAlteredCount())
72+
.sum();
73+
}
74+
75+
private String loadTestData(String filename) throws Exception {
76+
return new String(java.nio.file.Files.readAllBytes(
77+
java.nio.file.Paths.get("src/e2e/java/org/cbioportal/AlterationEnrichmentControllerE2ETest/" + filename)));
78+
}
79+
80+
@Test
81+
void testFetchAlterationEnrichmentsWithDataJson() throws Exception {
82+
// this combination comparison session has two studies, one WES and the other IMPACT
83+
// 104 samples total, 92 of which are belong to WES study. 14 samples should be profiled for only IMPACT genes
84+
// NOTE that of 92, only 91 are profiled
85+
AlterationEnrichment[] enrichments = callEnrichmentEndpoint(loadTestData("all_alterations.json"));
86+
87+
AlterationEnrichment spsb1Enrichment = findGeneEnrichment(enrichments, "SPSB1");
88+
assertNotNull(spsb1Enrichment, "SPSB1 enrichment should be present in response");
89+
assertEquals(91, getTotalProfiledSamples(spsb1Enrichment), "SPSB1 should have 91 total profiled samples across all groups");
90+
91+
AlterationEnrichment tp53Enrichment = findGeneEnrichment(enrichments, "TP53");
92+
assertNotNull(tp53Enrichment, "TP53 enrichment should be present in response");
93+
assertEquals(103, getTotalProfiledSamples(tp53Enrichment), "TP53 should have 103 total profiled samples across all groups because it is in IMPACT");
94+
}
95+
96+
@Test
97+
void testFetchAlterationEnrichmentsWithDataJsonCNAOnly() throws Exception {
98+
// this combination comparison session has two studies, one WES and the other IMPACT
99+
// 104 samples total, 92 of which are belong to WES study. 14 samples should be profiled for only IMPACT genes
100+
// NOTE that of 92, only 91 are profiled
101+
102+
String testDataJson = loadTestData("all_alterations.json");
103+
104+
// Parse JSON and filter out mutation and structural variant profiles from molecularProfileCaseIdentifiers
105+
com.fasterxml.jackson.databind.JsonNode rootNode = OBJECT_MAPPER.readTree(testDataJson);
106+
com.fasterxml.jackson.databind.node.ArrayNode groupsArray =
107+
(com.fasterxml.jackson.databind.node.ArrayNode) rootNode.get("molecularProfileCasesGroupFilter");
108+
109+
// Filter each group's molecularProfileCaseIdentifiers to exclude mutation and structural variant profiles
110+
for (com.fasterxml.jackson.databind.JsonNode group : groupsArray) {
111+
com.fasterxml.jackson.databind.node.ArrayNode identifiersArray =
112+
(com.fasterxml.jackson.databind.node.ArrayNode) group.get("molecularProfileCaseIdentifiers");
113+
114+
// Create a new array with only CNA profiles (excluding mutation and structural variant profiles)
115+
com.fasterxml.jackson.databind.node.ArrayNode filteredArray = OBJECT_MAPPER.createArrayNode();
116+
for (com.fasterxml.jackson.databind.JsonNode identifier : identifiersArray) {
117+
String profileId = identifier.get("molecularProfileId").asText();
118+
if (!profileId.endsWith("_mutations") && !profileId.endsWith("_structural_variants")) {
119+
filteredArray.add(identifier);
120+
}
121+
}
122+
123+
// Replace the original array with the filtered one
124+
((com.fasterxml.jackson.databind.node.ObjectNode) group).set("molecularProfileCaseIdentifiers", filteredArray);
125+
}
126+
127+
AlterationEnrichment[] enrichments = callEnrichmentEndpoint(OBJECT_MAPPER.writeValueAsString(rootNode));
128+
129+
AlterationEnrichment spsb1Enrichment = findGeneEnrichment(enrichments, "SPSB1");
130+
assertNotNull(spsb1Enrichment, "SPSB1 enrichment should be present in response");
131+
assertEquals(89, getTotalProfiledSamples(spsb1Enrichment), "SPSB1 should have 91 total profiled samples across all groups");
132+
133+
AlterationEnrichment tp53Enrichment = findGeneEnrichment(enrichments, "TP53");
134+
assertNotNull(tp53Enrichment, "TP53 enrichment should be present in response");
135+
assertEquals(89, getTotalProfiledSamples(tp53Enrichment), "TP53 should have 104 total profiled samples across all groups because it is in IMPACT");
136+
}
137+
138+
139+
@Test
140+
void testFetchAlterationEnrichmentsWithDataJsonNoMissense() throws Exception {
141+
// this combination comparison session has two studies, one WES and the other IMPACT
142+
// 104 samples total, 92 of which are belong to WES study. 14 samples should be profiled for only IMPACT genes
143+
// NOTE that of 92, only 91 are profiled
144+
145+
String testDataJson = loadTestData("all_alterations.json");
146+
147+
// Parse JSON and disable missense_mutation
148+
com.fasterxml.jackson.databind.JsonNode rootNode = OBJECT_MAPPER.readTree(testDataJson);
149+
com.fasterxml.jackson.databind.node.ObjectNode mutationEventTypes =
150+
(com.fasterxml.jackson.databind.node.ObjectNode) rootNode.get("alterationEventTypes").get("mutationEventTypes");
151+
mutationEventTypes.put("missense_mutation", false);
152+
153+
AlterationEnrichment[] enrichments = callEnrichmentEndpoint(OBJECT_MAPPER.writeValueAsString(rootNode));
154+
155+
AlterationEnrichment ANP32EEnrichment = findGeneEnrichment(enrichments, "ANP32E");
156+
assertNotNull(ANP32EEnrichment, "ANP32E enrichment should be present in response");
157+
assertEquals(3, getTotalAlteredSamples(ANP32EEnrichment), "ANP32E should have 3 altered samples with missense_mutation disabled");
158+
159+
// Call with raw testDataJson to compare
160+
AlterationEnrichment[] rawEnrichments = callEnrichmentEndpoint(testDataJson);
161+
AlterationEnrichment rawANP32EEnrichment = findGeneEnrichment(rawEnrichments, "ANP32E");
162+
assertNotNull(rawANP32EEnrichment, "ANP32E enrichment should be present in raw response");
163+
assertEquals(4, getTotalAlteredSamples(rawANP32EEnrichment), "ANP32E should have 4 altered samples with all mutations enabled");
164+
165+
}
166+
167+
168+
169+
170+
@Test
171+
void testFetchAlterationEnrichmentsWithMultiPanel() throws Exception {
172+
// this comparison session is of 33 samples (from a single study) which are covered by 2 different panels
173+
AlterationEnrichment[] enrichments = callEnrichmentEndpoint(loadTestData("multi_panel.json"));
174+
175+
assertTrue(Arrays.stream(enrichments).allMatch(enrichment -> enrichment.getCounts().size() == 4),
176+
"All genes should have exactly 4 groups");
177+
assertTrue(Arrays.stream(enrichments).allMatch(enrichment ->
178+
enrichment.getCounts().stream().anyMatch(count -> count.getAlteredCount() > 0)),
179+
"Each gene should have at least one group with an alteration");
180+
181+
AlterationEnrichment tp53i13Enrichment = findGeneEnrichment(enrichments, "TP53I13");
182+
assertNotNull(tp53i13Enrichment, "TP53I13 enrichment should be present in response");
183+
// of 33 samples, 26 are covered by WES panel for mutation and so only those will be profiled for
184+
// genes which are not covered by panel
185+
assertEquals(26, getTotalProfiledSamples(tp53i13Enrichment), "TP53I13 should have 26 total profiled samples across all groups");
186+
187+
AlterationEnrichment tp53Enrichment = findGeneEnrichment(enrichments, "TP53");
188+
assertNotNull(tp53Enrichment, "TP53 enrichment should be present in response");
189+
assertEquals(33, getTotalProfiledSamples(tp53Enrichment), "TP53 should have 33 total profiled samples across all groups because it is in IMPACT");
190+
}
191+
192+
193+
@Test
194+
void testFetchAlterationFilteringByAlterationType() throws Exception {
195+
AlterationEnrichment[] enrichments = callEnrichmentEndpoint(loadTestData("multi_panel.json"));
196+
197+
AlterationEnrichment tp53Enrichment = findGeneEnrichment(enrichments, "TP53");
198+
assertNotNull(tp53Enrichment, "TP53 enrichment should be present in response");
199+
assertEquals(24, getTotalAlteredSamples(tp53Enrichment), "TP53 should have 24 total altered samples across all groups");
200+
}
201+
202+
@Test
203+
void testFetchAlterationEnrichmentsExcludingMissenseMutations() throws Exception {
204+
String testDataJson = loadTestData("multi_panel.json");
205+
206+
// Get baseline results without filter
207+
AlterationEnrichment[] originalEnrichments = callEnrichmentEndpoint(testDataJson);
208+
AlterationEnrichment originalTp53Enrichment = findGeneEnrichment(originalEnrichments, "TP53");
209+
assertNotNull(originalTp53Enrichment, "TP53 enrichment should be present in original response");
210+
int originalTotalAlteredSamples = getTotalAlteredSamples(originalTp53Enrichment);
211+
212+
// Modify JSON to exclude missense mutations
213+
com.fasterxml.jackson.databind.JsonNode rootNode = OBJECT_MAPPER.readTree(testDataJson);
214+
com.fasterxml.jackson.databind.node.ObjectNode mutationEventTypes =
215+
(com.fasterxml.jackson.databind.node.ObjectNode) rootNode.get("alterationEventTypes").get("mutationEventTypes");
216+
mutationEventTypes.put("missense", false);
217+
mutationEventTypes.put("missense_mutation", false);
218+
mutationEventTypes.put("missense_variant", false);
219+
220+
// Get filtered results
221+
AlterationEnrichment[] filteredEnrichments = callEnrichmentEndpoint(OBJECT_MAPPER.writeValueAsString(rootNode));
222+
AlterationEnrichment filteredTp53Enrichment = findGeneEnrichment(filteredEnrichments, "TP53");
223+
assertNotNull(filteredTp53Enrichment, "TP53 enrichment should be present in filtered response");
224+
int filteredTotalAlteredSamples = getTotalAlteredSamples(filteredTp53Enrichment);
225+
226+
// Verify filtering worked
227+
assertTrue(filteredTotalAlteredSamples < originalTotalAlteredSamples,
228+
"TP53 should have fewer altered samples when missense mutations are excluded. Original: " +
229+
originalTotalAlteredSamples + ", Filtered: " + filteredTotalAlteredSamples);
230+
assertEquals(12, filteredTotalAlteredSamples,
231+
"TP53 should have 12 altered samples when missense mutations are excluded");
232+
assertTrue(filteredEnrichments.length < originalEnrichments.length,
233+
"Filtered response should have fewer genes than original. Original: " + originalEnrichments.length +
234+
", Filtered: " + filteredEnrichments.length + " (genes with only missense mutations should be excluded)");
235+
}
236+
237+
@Test
238+
void testFetchAlterationEnrichmentsPatientVSample() throws Exception {
239+
240+
// from https://www.cbioportal.org/comparison/alterations?comparisonId=6184fd03f8f71021ce56e3ff
241+
242+
String testDataJsonSample = loadTestData("sample.json");
243+
String testDataJsonPatient = loadTestData("patient.json");
244+
245+
// Get F8 enrichment with SAMPLE enrichment type
246+
AlterationEnrichment[] sampleEnrichments = callEnrichmentEndpoint(testDataJsonSample, EnrichmentType.SAMPLE);
247+
AlterationEnrichment f8SampleEnrichment = findGeneEnrichment(sampleEnrichments, "F8");
248+
249+
// Get F8 enrichment with PATIENT enrichment type
250+
AlterationEnrichment[] patientEnrichments = callEnrichmentEndpoint(testDataJsonPatient, EnrichmentType.PATIENT);
251+
AlterationEnrichment f8PatientEnrichment = findGeneEnrichment(patientEnrichments, "F8");
252+
253+
// Both should be present
254+
assertNotNull(f8SampleEnrichment, "F8 enrichment should be present in SAMPLE response");
255+
assertNotNull(f8PatientEnrichment, "F8 enrichment should be present in PATIENT response");
256+
257+
// Get counts for comparison
258+
int sampleProfiledCount = getTotalProfiledSamples(f8SampleEnrichment);
259+
int patientProfiledCount = getTotalProfiledSamples(f8PatientEnrichment);
260+
int sampleAlteredCount = getTotalAlteredSamples(f8SampleEnrichment);
261+
int patientAlteredCount = getTotalAlteredSamples(f8PatientEnrichment);
262+
263+
// Verify that SAMPLE and PATIENT enrichment types produce different results
264+
assertEquals(sampleAlteredCount, 31);
265+
assertEquals(patientAlteredCount, 14);
266+
267+
assertEquals(sampleProfiledCount,447);
268+
269+
assertEquals(patientProfiledCount,100);
270+
271+
272+
}
273+
274+
275+
}

0 commit comments

Comments
 (0)