Skip to content

Commit e70d600

Browse files
authored
Migrate clinical-data endpoint to clean architecture with ClickHouse support (#11425)
* Implement clinical-data in clean arch with ClickHouse support * Add integration tests * Fix ResultMap * Rectify naming * Address comments
1 parent 8c0620d commit e70d600

File tree

33 files changed

+1323
-123
lines changed

33 files changed

+1323
-123
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
package org.cbioportal.infrastructure.repository.clickhouse.clinical_data;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertFalse;
5+
import static org.junit.jupiter.api.Assertions.assertNotNull;
6+
import static org.junit.jupiter.api.Assertions.assertNull;
7+
import static org.junit.jupiter.api.Assertions.assertTrue;
8+
9+
import java.util.Arrays;
10+
import java.util.List;
11+
import org.cbioportal.AbstractClickhouseIntegrationTest;
12+
import org.cbioportal.domain.clinical_data.ClinicalData;
13+
import org.cbioportal.domain.clinical_data.ClinicalDataType;
14+
import org.junit.jupiter.api.BeforeEach;
15+
import org.junit.jupiter.api.Test;
16+
import org.junit.jupiter.api.extension.ExtendWith;
17+
import org.springframework.beans.factory.annotation.Autowired;
18+
import org.springframework.test.context.junit.jupiter.SpringExtension;
19+
20+
@ExtendWith(SpringExtension.class)
21+
class ClickHouseClinicalDataRepositoryIntegrationTest extends AbstractClickhouseIntegrationTest {
22+
23+
private ClickhouseClinicalDataRepository repository;
24+
25+
@Autowired
26+
private ClickhouseClinicalDataMapper mapper;
27+
28+
// Test data based on actual cBioPortal public dataset
29+
private static final List<String> TEST_SAMPLE_UNIQUE_IDS = Arrays.asList(
30+
"acc_tcga_pan_can_atlas_2018_TCGA-OR-A5J1-01",
31+
"acc_tcga_pan_can_atlas_2018_TCGA-OR-A5J2-01",
32+
"acc_tcga_pan_can_atlas_2018_TCGA-OR-A5J3-01"
33+
);
34+
35+
private static final List<String> TEST_PATIENT_UNIQUE_IDS = Arrays.asList(
36+
"acc_tcga_pan_can_atlas_2018_TCGA-OR-A5J1",
37+
"acc_tcga_pan_can_atlas_2018_TCGA-OR-A5J2",
38+
"acc_tcga_pan_can_atlas_2018_TCGA-OR-A5J3"
39+
);
40+
41+
private static final List<String> COMMON_SAMPLE_ATTRIBUTES = Arrays.asList(
42+
"SAMPLE_TYPE",
43+
"ANEUPLOIDY_SCORE"
44+
);
45+
46+
private static final List<String> COMMON_PATIENT_ATTRIBUTES = Arrays.asList(
47+
"AGE",
48+
"SEX",
49+
"PRIOR_DX"
50+
);
51+
52+
@BeforeEach
53+
void setup() {
54+
repository = new ClickhouseClinicalDataRepository(mapper);
55+
}
56+
57+
@Test
58+
void testFetchClinicalDataId_WithSampleData() {
59+
List<ClinicalData> result = repository.fetchClinicalDataId(
60+
TEST_SAMPLE_UNIQUE_IDS,
61+
COMMON_SAMPLE_ATTRIBUTES,
62+
ClinicalDataType.SAMPLE
63+
);
64+
65+
assertFalse(result.isEmpty());
66+
67+
result.forEach(clinicalData -> {
68+
assertNotNull(clinicalData.internalId());
69+
assertNotNull(clinicalData.sampleId());
70+
assertNotNull(clinicalData.studyId());
71+
assertNotNull(clinicalData.attrId());
72+
assertNull(clinicalData.attrValue());
73+
assertNull(clinicalData.clinicalAttribute());
74+
});
75+
}
76+
77+
@Test
78+
void testFetchClinicalDataSummary_WithPatientData() {
79+
List<ClinicalData> result = repository.fetchClinicalDataSummary(
80+
TEST_PATIENT_UNIQUE_IDS,
81+
COMMON_PATIENT_ATTRIBUTES,
82+
ClinicalDataType.PATIENT
83+
);
84+
85+
assertFalse(result.isEmpty());
86+
87+
result.forEach(clinicalData -> {
88+
assertNotNull(clinicalData.internalId());
89+
assertNotNull(clinicalData.patientId());
90+
assertNotNull(clinicalData.studyId());
91+
assertNotNull(clinicalData.attrId());
92+
assertNotNull(clinicalData.attrValue());
93+
assertNull(clinicalData.clinicalAttribute());
94+
});
95+
}
96+
97+
@Test
98+
void testFetchClinicalDataDetailed_WithSpecificValues() {
99+
// When - get detailed data for specific samples we know the values for
100+
List<ClinicalData> result = repository.fetchClinicalDataDetailed(
101+
Arrays.asList(
102+
"acc_tcga_pan_can_atlas_2018_TCGA-OR-A5J1-01",
103+
"acc_tcga_pan_can_atlas_2018_TCGA-OR-A5J2-01"
104+
),
105+
Arrays.asList("ANEUPLOIDY_SCORE", "SAMPLE_TYPE"),
106+
ClinicalDataType.SAMPLE
107+
);
108+
109+
// Then - verify we get the expected values from real data
110+
assertFalse(result.isEmpty(), "Should return data for valid sample IDs");
111+
112+
// Verify specific known values
113+
boolean foundJ1Aneuploidy = false;
114+
boolean foundJ2Aneuploidy = false;
115+
boolean foundSampleType = false;
116+
117+
for (ClinicalData data : result) {
118+
if ("TCGA-OR-A5J1-01".equals(data.sampleId()) && "ANEUPLOIDY_SCORE".equals(data.attrId())) {
119+
assertEquals("2", data.attrValue(), "TCGA-OR-A5J1-01 should have ANEUPLOIDY_SCORE = 2");
120+
foundJ1Aneuploidy = true;
121+
}
122+
if ("TCGA-OR-A5J2-01".equals(data.sampleId()) && "ANEUPLOIDY_SCORE".equals(data.attrId())) {
123+
assertEquals("10", data.attrValue(), "TCGA-OR-A5J2-01 should have ANEUPLOIDY_SCORE = 10");
124+
foundJ2Aneuploidy = true;
125+
}
126+
if ("SAMPLE_TYPE".equals(data.attrId())) {
127+
assertEquals("Primary", data.attrValue(), "All samples should have SAMPLE_TYPE = Primary");
128+
foundSampleType = true;
129+
}
130+
131+
// Verify DETAILED projection includes clinical attribute metadata
132+
assertNotNull(data.clinicalAttribute(), "DETAILED projection should include clinical attribute");
133+
assertEquals(data.attrId(), data.clinicalAttribute().attrId(), "Attribute IDs should match");
134+
}
135+
136+
assertTrue(foundJ1Aneuploidy, "Should find ANEUPLOIDY_SCORE for TCGA-OR-A5J1-01");
137+
assertTrue(foundJ2Aneuploidy, "Should find ANEUPLOIDY_SCORE for TCGA-OR-A5J2-01");
138+
assertTrue(foundSampleType, "Should find SAMPLE_TYPE data");
139+
}
140+
141+
@Test
142+
void testFetchClinicalDataMeta_WithSampleData() {
143+
Integer count = repository.fetchClinicalDataMeta(
144+
TEST_SAMPLE_UNIQUE_IDS,
145+
COMMON_SAMPLE_ATTRIBUTES,
146+
ClinicalDataType.SAMPLE
147+
);
148+
149+
assertNotNull(count);
150+
assertEquals(54, count.intValue());
151+
152+
List<ClinicalData> actualData = repository.fetchClinicalDataSummary(
153+
TEST_SAMPLE_UNIQUE_IDS,
154+
COMMON_SAMPLE_ATTRIBUTES,
155+
ClinicalDataType.SAMPLE
156+
);
157+
assertEquals(actualData.size(), count.intValue());
158+
}
159+
160+
@Test
161+
void testFetchClinicalDataId_WithEmptyInput() {
162+
// When
163+
List<ClinicalData> result = repository.fetchClinicalDataId(
164+
List.of(),
165+
COMMON_SAMPLE_ATTRIBUTES,
166+
ClinicalDataType.SAMPLE
167+
);
168+
169+
// Then
170+
assertTrue(result.isEmpty(), "Should return empty list for empty input");
171+
}
172+
173+
@Test
174+
void testFetchClinicalDataMeta_WithEmptyInput() {
175+
// When
176+
Integer count = repository.fetchClinicalDataMeta(
177+
List.of(),
178+
COMMON_SAMPLE_ATTRIBUTES,
179+
ClinicalDataType.SAMPLE
180+
);
181+
182+
// Then
183+
assertEquals(0, count.intValue(), "Should return 0 count for empty input");
184+
}
185+
186+
@Test
187+
void testProjectionConsistency_SameEntitiesReturned() {
188+
// When - get data with different projections using subset for focused test
189+
List<String> testIds = Arrays.asList(
190+
"acc_tcga_pan_can_atlas_2018_TCGA-OR-A5J1-01",
191+
"acc_tcga_pan_can_atlas_2018_TCGA-OR-A5J2-01"
192+
);
193+
List<String> testAttrs = List.of("SAMPLE_TYPE");
194+
195+
List<ClinicalData> idResults = repository.fetchClinicalDataId(
196+
testIds, testAttrs, ClinicalDataType.SAMPLE
197+
);
198+
199+
List<ClinicalData> summaryResults = repository.fetchClinicalDataSummary(
200+
testIds, testAttrs, ClinicalDataType.SAMPLE
201+
);
202+
203+
// Then - should return same entities with different levels of detail
204+
assertEquals(2, idResults.size(), "Should return 2 records for 2 samples with SAMPLE_TYPE");
205+
assertEquals(idResults.size(), summaryResults.size(),
206+
"Different projections should return same number of entities");
207+
208+
// Verify entities match (same internal IDs and attribute IDs)
209+
for (int i = 0; i < idResults.size(); i++) {
210+
ClinicalData idData = idResults.get(i);
211+
ClinicalData summaryData = summaryResults.get(i);
212+
213+
assertEquals(idData.internalId(), summaryData.internalId(),
214+
"Internal ID should match across projections");
215+
assertEquals(idData.attrId(), summaryData.attrId(),
216+
"Attribute ID should match across projections");
217+
assertEquals(idData.sampleId(), summaryData.sampleId(),
218+
"Sample ID should match across projections");
219+
220+
// Verify projection differences
221+
assertNull(idData.attrValue(), "ID projection should not have attribute value");
222+
assertEquals("Primary", summaryData.attrValue(), "SUMMARY projection should have attribute value");
223+
}
224+
}
225+
226+
@Test
227+
void testClinicalDataType_PatientVsSample() {
228+
// When
229+
List<ClinicalData> sampleData = repository.fetchClinicalDataSummary(
230+
TEST_SAMPLE_UNIQUE_IDS,
231+
COMMON_SAMPLE_ATTRIBUTES,
232+
ClinicalDataType.SAMPLE
233+
);
234+
235+
List<ClinicalData> patientData = repository.fetchClinicalDataSummary(
236+
TEST_PATIENT_UNIQUE_IDS,
237+
COMMON_PATIENT_ATTRIBUTES,
238+
ClinicalDataType.PATIENT
239+
);
240+
241+
// Then - verify correct data type returned
242+
sampleData.forEach(data -> {
243+
assertNotNull(data.sampleId(), "Sample data should have sample ID");
244+
assertNotNull(data.patientId(), "Sample data should also have patient ID");
245+
});
246+
247+
patientData.forEach(data -> {
248+
assertNotNull(data.patientId(), "Patient data should have patient ID");
249+
// Sample ID may be null or empty for patient-level data
250+
});
251+
}
252+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package org.cbioportal.application.rest.mapper;
2+
3+
import java.util.List;
4+
import org.cbioportal.application.rest.response.ClinicalAttributeDTO;
5+
import org.cbioportal.domain.clinical_attributes.ClinicalAttribute;
6+
import org.mapstruct.Mapper;
7+
import org.mapstruct.Mapping;
8+
import org.mapstruct.factory.Mappers;
9+
10+
@Mapper
11+
public interface ClinicalAttributeMapper {
12+
ClinicalAttributeMapper INSTANCE = Mappers.getMapper(ClinicalAttributeMapper.class);
13+
14+
@Mapping(target = "clinicalAttributeId", source = "attrId")
15+
@Mapping(target = "studyId", source = "cancerStudyIdentifier")
16+
ClinicalAttributeDTO toClinicalAttributeDTO(ClinicalAttribute clinicalAttribute);
17+
18+
List<ClinicalAttributeDTO> toClinicalAttributeDTOs(List<ClinicalAttribute> clinicalAttributes);
19+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package org.cbioportal.application.rest.mapper;
2+
3+
import java.util.List;
4+
import org.cbioportal.application.rest.response.ClinicalDataDTO;
5+
import org.cbioportal.domain.clinical_data.ClinicalData;
6+
import org.cbioportal.legacy.utils.Encoder;
7+
import org.mapstruct.Mapper;
8+
import org.mapstruct.Mapping;
9+
import org.mapstruct.factory.Mappers;
10+
11+
@Mapper(imports = Encoder.class, uses = ClinicalAttributeMapper.class)
12+
public interface ClinicalDataMapper {
13+
ClinicalDataMapper INSTANCE = Mappers.getMapper(ClinicalDataMapper.class);
14+
15+
@Mapping(
16+
target = "uniqueSampleKey",
17+
expression =
18+
"java( Encoder.calculateBase64(clinicalData.sampleId()," + "clinicalData.studyId()) )")
19+
@Mapping(
20+
target = "uniquePatientKey",
21+
expression =
22+
"java( Encoder.calculateBase64(clinicalData.patientId(), " + "clinicalData.studyId()) )")
23+
@Mapping(target = "patientAttribute", source = "clinicalAttribute.patientAttribute")
24+
@Mapping(target = "clinicalAttributeId", source = "attrId")
25+
@Mapping(target = "value", source = "attrValue")
26+
ClinicalDataDTO toClinicalDataDTO(ClinicalData clinicalData);
27+
28+
List<ClinicalDataDTO> toDTOs(List<ClinicalData> clinicalDataList);
29+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package org.cbioportal.application.rest.response;
2+
3+
public record ClinicalAttributeDTO(
4+
String displayName,
5+
String description,
6+
String datatype,
7+
Boolean patientAttribute,
8+
String priority,
9+
String clinicalAttributeId,
10+
String studyId) {}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package org.cbioportal.application.rest.response;
2+
3+
public record ClinicalDataDTO(
4+
String uniqueSampleKey,
5+
String uniquePatientKey,
6+
String sampleId,
7+
String patientId,
8+
String studyId,
9+
ClinicalAttributeDTO clinicalAttribute,
10+
Boolean patientAttribute,
11+
String clinicalAttributeId,
12+
String value) {}

0 commit comments

Comments
 (0)