Skip to content

Commit c4752a7

Browse files
authored
Fix the keyword search for studies and genes (sort and matching pattern) (#11812)
1 parent bc8ab49 commit c4752a7

File tree

5 files changed

+165
-13
lines changed

5 files changed

+165
-13
lines changed

src/integration/java/org/cbioportal/infrastructure/repository/clickhouse/cancerstudy/ClickhouseCancerStudyRepositoryIntegrationTest.java

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,107 @@ void testGetCancerStudiesMetadata() {
3030
repository.getCancerStudiesMetadata(new SortAndSearchCriteria("", "", "", null, null));
3131
assertEquals(TOTAL_STUDIES, studies.size());
3232
}
33+
34+
@Test
35+
void testGetCancerStudiesMetadataSummary() {
36+
var studies =
37+
repository.getCancerStudiesMetadataSummary(
38+
new SortAndSearchCriteria("", "", "", null, null));
39+
assertEquals(TOTAL_STUDIES, studies.size());
40+
}
41+
42+
@Test
43+
void testGetCancerStudiesMetadataSummaryWithKeywordSearch() {
44+
// Search for studies containing "tcga" in name, identifier, or cancer type
45+
var studies =
46+
repository.getCancerStudiesMetadataSummary(
47+
new SortAndSearchCriteria("tcga", "", "", null, null));
48+
49+
// Should return studies matching "tcga" (case-insensitive substring match)
50+
// All returned studies should contain "tcga" somewhere in their metadata
51+
studies.forEach(
52+
study -> {
53+
String searchable =
54+
(study.name()
55+
+ " "
56+
+ study.cancerStudyIdentifier()
57+
+ " "
58+
+ study.typeOfCancer().name()
59+
+ " "
60+
+ study.typeOfCancer().id())
61+
.toLowerCase();
62+
assert searchable.contains("tcga")
63+
: "Study "
64+
+ study.cancerStudyIdentifier()
65+
+ " does not contain 'tcga' in searchable fields";
66+
});
67+
68+
// Should have fewer results than total studies
69+
assert studies.size() < TOTAL_STUDIES : "Keyword search should filter results";
70+
assert !studies.isEmpty() : "Should find at least some TCGA studies";
71+
}
72+
73+
@Test
74+
void testGetCancerStudiesMetadataSummaryWithKeywordSearchCaseInsensitive() {
75+
// Test case-insensitive search
76+
var studiesLower =
77+
repository.getCancerStudiesMetadataSummary(
78+
new SortAndSearchCriteria("breast", "", "", null, null));
79+
var studiesUpper =
80+
repository.getCancerStudiesMetadataSummary(
81+
new SortAndSearchCriteria("Breast", "", "", null, null));
82+
83+
// Both searches should return the same results (case-insensitive)
84+
assertEquals(
85+
studiesLower.size(),
86+
studiesUpper.size(),
87+
"Case-insensitive search should return same number of results");
88+
89+
// Verify all results contain "breast" (case-insensitive)
90+
studiesLower.forEach(
91+
study -> {
92+
String searchable =
93+
(study.name()
94+
+ " "
95+
+ study.cancerStudyIdentifier()
96+
+ " "
97+
+ study.typeOfCancer().name()
98+
+ " "
99+
+ study.typeOfCancer().id())
100+
.toLowerCase();
101+
assert searchable.contains("breast")
102+
: "Study "
103+
+ study.cancerStudyIdentifier()
104+
+ " does not contain 'breast' in searchable fields";
105+
});
106+
}
107+
108+
@Test
109+
void testGetCancerStudiesMetadataSummaryWithKeywordSearchSubstringMatch() {
110+
// Test that substring matching works (not just prefix)
111+
var studies =
112+
repository.getCancerStudiesMetadataSummary(
113+
new SortAndSearchCriteria("carcinoma", "", "", null, null));
114+
115+
// Should find studies with "carcinoma" anywhere in the name or cancer type
116+
assert !studies.isEmpty() : "Should find studies with 'carcinoma' in their metadata";
117+
118+
// Verify all results contain "carcinoma"
119+
studies.forEach(
120+
study -> {
121+
String searchable =
122+
(study.name()
123+
+ " "
124+
+ study.cancerStudyIdentifier()
125+
+ " "
126+
+ study.typeOfCancer().name()
127+
+ " "
128+
+ study.typeOfCancer().id())
129+
.toLowerCase();
130+
assert searchable.contains("carcinoma")
131+
: "Study "
132+
+ study.cancerStudyIdentifier()
133+
+ " does not contain 'carcinoma' in searchable fields";
134+
});
135+
}
33136
}

src/main/java/org/cbioportal/legacy/service/impl/GeneServiceImpl.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ private List<Gene> filterGenesWithMultipleEntrezIds(List<Gene> geneList) {
189189
groupedGenes ->
190190
groupedGenes.size() == 1) // filter out genes having duplicate hugoGeneSymbol
191191
.map(groupedGenes -> groupedGenes.get(0))
192+
.sorted((g1, g2) -> g1.getHugoGeneSymbol().compareTo(g2.getHugoGeneSymbol()))
192193
.collect(toList());
193194
}
194195
}

src/main/resources/mappers/clickhouse/cancerstudy/CancerStudyMapper.xml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -154,21 +154,21 @@
154154
</if>
155155
<if test="sortAndSearchCriteria.isSearchable()">
156156
<foreach item="item" collection="sortAndSearchCriteria.searchTerm.split(' ')" open="(" separator=") AND (" close=")">
157-
cs.name like CONCAT('%', #{item}, '%') OR
158-
cs.cancer_study_identifier like CONCAT('%', #{item}, '%') OR
159-
type_of_cancer.name like CONCAT('%', #{item}, '%') OR
160-
type_of_cancer.type_of_cancer_id like CONCAT('%', #{item}, '%')
157+
cs.name ilike CONCAT('%', #{item}, '%') OR
158+
cs.cancer_study_identifier ilike CONCAT('%', #{item}, '%') OR
159+
type_of_cancer.name ilike CONCAT('%', #{item}, '%') OR
160+
type_of_cancer.type_of_cancer_id ilike CONCAT('%', #{item}, '%')
161161
</foreach>
162162
</if>
163163
</where>
164-
<if test="sortAndSearchCriteria.isSortable()">
165-
ORDER BY ${sortAndSearchCriteria.sortField} ${sortAndSearchCriteria.sortOrder}
166-
</if>
167-
<!-- Currently don't support Pagenation at the db layer
168-
<if test="sortAndSearchCriteria.isPaginated()">
169-
LIMIT #{sortAndSearchCriteria.pageSize} OFFSET #{sortAndSearchCriteria.offset}
170-
</if>
171-
-->
164+
<choose>
165+
<when test="sortAndSearchCriteria.isSortable()">
166+
ORDER BY ${sortAndSearchCriteria.sortField} ${sortAndSearchCriteria.sortOrder}
167+
</when>
168+
<when test="sortAndSearchCriteria.isSearchable()">
169+
ORDER BY cancer_study.name ASC
170+
</when>
171+
</choose>
172172
</select>
173173

174174
<resultMap id="CancerStudyMetadataResultMap"

src/main/resources/org/cbioportal/legacy/persistence/mybatis/GeneMapper.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
</if>
2727
<if test="keyword != null">
2828
<foreach item="item" collection="keyword.split(' ')" open="(" separator=") AND (" close=")">
29-
gene.hugo_gene_symbol ilike CONCAT('%', #{item}, '%')
29+
gene.hugo_gene_symbol ilike CONCAT(#{item}, '%')
3030
</foreach>
3131
</if>
3232
</where>

src/test/java/org/cbioportal/legacy/persistence/mybatis/GeneMyBatisRepositoryTest.java

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,54 @@ public void getAllGenesSummaryProjectionHugoGeneSymbolSort() throws Exception {
125125
Assert.assertEquals("SAMD11", result.get(21).getHugoGeneSymbol());
126126
}
127127

128+
@Test
129+
public void getAllGenesWithKeywordSearch() throws Exception {
130+
131+
List<Gene> result =
132+
geneMyBatisRepository.getAllGenes("AKT", null, "SUMMARY", null, null, null, null);
133+
134+
Assert.assertEquals(3, result.size());
135+
Assert.assertEquals("AKT1", result.get(0).getHugoGeneSymbol());
136+
Assert.assertEquals("AKT2", result.get(1).getHugoGeneSymbol());
137+
Assert.assertEquals("AKT3", result.get(2).getHugoGeneSymbol());
138+
}
139+
140+
@Test
141+
public void getAllGenesWithKeywordSearchCaseInsensitive() throws Exception {
142+
143+
List<Gene> result =
144+
geneMyBatisRepository.getAllGenes("akt", null, "SUMMARY", null, null, null, null);
145+
146+
Assert.assertEquals(3, result.size());
147+
Assert.assertEquals("AKT1", result.get(0).getHugoGeneSymbol());
148+
Assert.assertEquals("AKT2", result.get(1).getHugoGeneSymbol());
149+
Assert.assertEquals("AKT3", result.get(2).getHugoGeneSymbol());
150+
}
151+
152+
@Test
153+
public void getAllGenesWithKeywordSearchAlphabeticalOrder() throws Exception {
154+
155+
List<Gene> result =
156+
geneMyBatisRepository.getAllGenes("A", null, "SUMMARY", null, null, null, null);
157+
158+
// Verify results are returned in alphabetical order by hugo gene symbol
159+
Assert.assertTrue(result.size() > 0);
160+
for (int i = 0; i < result.size() - 1; i++) {
161+
String current = result.get(i).getHugoGeneSymbol();
162+
String next = result.get(i + 1).getHugoGeneSymbol();
163+
Assert.assertTrue(
164+
"Expected " + current + " to come before " + next + " alphabetically",
165+
current.compareTo(next) <= 0);
166+
}
167+
// Verify specific ordering of genes starting with 'A'
168+
Assert.assertEquals("AKT1", result.get(0).getHugoGeneSymbol());
169+
Assert.assertEquals("AKT2", result.get(1).getHugoGeneSymbol());
170+
Assert.assertEquals("AKT3", result.get(2).getHugoGeneSymbol());
171+
Assert.assertEquals("ALK", result.get(3).getHugoGeneSymbol());
172+
Assert.assertEquals("ARAF", result.get(4).getHugoGeneSymbol());
173+
Assert.assertEquals("ATM", result.get(5).getHugoGeneSymbol());
174+
}
175+
128176
@Test
129177
public void getMetaGenes() throws Exception {
130178

0 commit comments

Comments
 (0)