Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
e49d05c
Relax constraints for setting AnnData cell count
bistline Jun 4, 2025
0c1cd99
Adding conversion for numeric facets/metadata
bistline Jun 5, 2025
12c20c0
Merge pull request #2271 from broadinstitute/jb-anndata-cell-count
bistline Jun 5, 2025
5eef79e
Removing all BQ references
bistline Jun 5, 2025
2d97db0
working on test regressions
bistline Jun 9, 2025
0e3d87b
working on controller test regressions
bistline Jun 9, 2025
cf7820d
removing orphaned cleanup call
bistline Jun 9, 2025
6903391
fixing final regressions
bistline Jun 9, 2025
229abdf
removing bq gem
bistline Jun 9, 2025
b1ee7f9
adding missing migration
bistline Jun 9, 2025
eb1bec2
add missing minmax setter to migration
bistline Jun 9, 2025
61c3c38
Ensure best available annotation is valid for default cluster
bistline Jun 10, 2025
2558dfb
Simplify logic
bistline Jun 10, 2025
d093722
Further simplifying logic
bistline Jun 10, 2025
a6f7c8c
First pass at optional HCA results
bistline Jun 10, 2025
3d31854
Fixing command line
bistline Jun 11, 2025
a7e16fb
Finishing styling, functionality
bistline Jun 11, 2025
c77d64e
removing debug
bistline Jun 11, 2025
b8363ed
refactoring to nest inside "More facets"
bistline Jun 11, 2025
7fd7e49
Better refactoring
bistline Jun 11, 2025
73378a7
adding unit tests
bistline Jun 11, 2025
bc073c2
updating to ingest 1.42.0
bistline Jun 12, 2025
36c5588
Merge pull request #2273 from broadinstitute/jb-default-cluster-annot…
bistline Jun 12, 2025
7fb3b39
Moving into separate top-level component for future flexibility
bistline Jun 12, 2025
cf82c1d
Merge pull request #2272 from broadinstitute/jb-search-bigquery-decouple
bistline Jun 12, 2025
027db32
reverting blankspace edits
bistline Jun 12, 2025
82bf451
Better sorting/uniquifying of facet filters
bistline Jun 16, 2025
4a33df0
Merge pull request #2274 from broadinstitute/jb-hca-result-filter
bistline Jun 16, 2025
d3a3b74
Handle nil filter values
bistline Jun 16, 2025
f72cca3
Merge pull request #2275 from broadinstitute/jb-search-filter-order
bistline Jun 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ gem 'omniauth-google-oauth2'
gem 'omniauth-rails_csrf_protection'
gem 'googleauth'
gem 'google-cloud-storage', require: 'google/cloud/storage'
gem 'google-cloud-bigquery', require: 'google/cloud/bigquery'
gem 'google-apis-lifesciences_v2beta', require: 'google/apis/lifesciences_v2beta'
gem 'google-apis-batch_v1', require: 'google/apis/batch_v1'
gem 'bootstrap-sass', :git => 'https://github.com/twbs/bootstrap-sass'
Expand Down
9 changes: 0 additions & 9 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,6 @@ GEM
activesupport (>= 6.1)
google-apis-batch_v1 (0.27.0)
google-apis-core (>= 0.11.0, < 2.a)
google-apis-bigquery_v2 (0.54.0)
google-apis-core (>= 0.11.0, < 2.a)
google-apis-core (0.11.0)
addressable (~> 2.5, >= 2.5.1)
googleauth (>= 0.16.2, < 2.a)
Expand All @@ -210,12 +208,6 @@ GEM
google-apis-core (>= 0.11.0, < 2.a)
google-apis-storage_v1 (0.19.0)
google-apis-core (>= 0.9.0, < 2.a)
google-cloud-bigquery (1.43.1)
concurrent-ruby (~> 1.0)
google-apis-bigquery_v2 (~> 0.1)
google-cloud-core (~> 1.6)
googleauth (>= 0.16.2, < 2.a)
mini_mime (~> 1.0)
google-cloud-core (1.6.0)
google-cloud-env (~> 1.0)
google-cloud-errors (~> 1.0)
Expand Down Expand Up @@ -587,7 +579,6 @@ DEPENDENCIES
gibberish
google-apis-batch_v1
google-apis-lifesciences_v2beta
google-cloud-bigquery
google-cloud-storage
googleauth
irb
Expand Down
166 changes: 24 additions & 142 deletions app/controllers/api/v1/search_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ class SearchController < ApiBaseController
key :required, false
key :type, :string
end
parameter do
key :name, :external
key :in, :query
key :description, 'External search services to query in addition to SCP'
key :required, false
key :type, :string
key :enum, %w[hca]
end
parameter do
key :name, :page
key :in, :query
Expand Down Expand Up @@ -238,20 +246,7 @@ def index
if @studies.count > 0 && @facets.any?
sort_type = :facet
@studies_by_facet = {}
mongo_facets, bq_facets = self.class.divide_facets_by_source(@facets)
if bq_facets.any?
@big_query_search = self.class.generate_bq_query_string(bq_facets)
query_results = ApplicationController.big_query_client.dataset(CellMetadatum::BIGQUERY_DATASET).query @big_query_search
else
query_results = []
end
# run a query for any mongo-based facets
mongo_facets.map do |facet|
db_facet = facet[:db_facet]
mongo_results = StudySearchService.perform_mongo_facet_search(db_facet, facet[:filters])
query_results += mongo_results
end

query_results = StudySearchService.perform_mongo_facet_search(@facets)
# build up map of study matches by facet & filter value (for adding labels in UI)
@studies_by_facet = self.class.match_studies_by_facet(query_results, @facets)
# uniquify result list as one study may match multiple facets/filters
Expand Down Expand Up @@ -282,10 +277,11 @@ def index
# convert to array to allow appending external search results (Azul, TDR, etc.)
@studies = @studies.to_a

# perform Azul search if there are facets/terms provided by user
# perform Azul search if there are facets/terms provided by user, and they requested HCA results
# run this before inferred search so that they are weighted and sorted correctly
# skip if user is searching inside a collection or they are performing global gene search
if (@facets.present? || @term_list.present?) && (@selected_branding_group.nil? && @search_type == :study)
include_azul = params[:external] == 'hca' && @search_type == :study && @selected_branding_group.nil?
if (@facets.present? || @term_list.present?) && include_azul
begin
azul_results = ::AzulSearchService.append_results_to_studies(@studies,
selected_facets: @facets,
Expand All @@ -312,10 +308,10 @@ def index
@studies = @studies.sort_by do |study|
if study.is_a? Study
# combine text hits with metadata match totals to get real weight
metadata_weight = @metadata_matches.dig(study.accession, :facet_search_weight).to_i
-(study.search_weight(@term_list)[:total] + metadata_weight)
metadata_weight = @metadata_matches&.dig(study.accession, :facet_search_weight).to_i || 0
[-(study.search_weight(@term_list)[:total] + metadata_weight), 0]
else
-study[:term_search_weight]
[-study[:term_search_weight], 1] # external studies are always weighted lower
end
end
when :accession
Expand All @@ -333,9 +329,11 @@ def index
when :facet
@studies = @studies.sort_by do |study|
accession = self.class.get_study_attribute(study, :accession)
metadata_weight = @metadata_matches.present? ?
@metadata_matches.dig(accession, :facet_search_weight).to_i : 0
-(@studies_by_facet[accession][:facet_search_weight] + metadata_weight)
metadata_weight = @metadata_matches&.dig(accession, :facet_search_weight)&.to_i || 0
[
-(@studies_by_facet[accession][:facet_search_weight] + metadata_weight), # sort by facet weight
study.is_a?(Study) ? 0 : 1 # prioritize SCP results over external
]
end
when :recent
@studies = @studies.sort_by { |study| self.class.get_study_attribute(study, :created_at) }.reverse
Expand Down Expand Up @@ -576,120 +574,6 @@ def self.promote_exact_match(search_string, studies, match_data)
[reordered, match_data]
end

# generate query string for BQ
# array-based columns need to set up data in WITH clauses to allow for a single UNNEST(column_name) call,
# otherwise UNNEST() is called multiple times for each user-supplied filter value and could impact performance
def self.generate_bq_query_string(facets)
base_query = "SELECT DISTINCT study_accession"
from_clause = " FROM #{CellMetadatum::BIGQUERY_TABLE}"
where_clauses = []
with_clauses = []
or_facets = [['cell_type', 'cell_type__custom'], ['organ', 'organ_region']]
leading_or_facets = or_facets.map(&:first)
trailing_or_facets = or_facets.map(&:last)
or_grouped_where_clause = nil
# sort the facets so that OR'ed facets will be next to each other, the 99 is just
# an arbitrary large-ish number to make sure the non-or-grouped facets are sorted together
sorted_facets = facets.sort_by {|facet| or_facets.flatten.find_index(facet[:id]) || 99}
sorted_facets.each_with_index do |facet_obj, index|
query_elements = get_query_elements_for_facet(facet_obj)
from_clause += ", #{query_elements[:from]}" if query_elements[:from]
base_query += ", #{query_elements[:select]}" if query_elements[:select]
with_clauses << query_elements[:with] if query_elements[:with]
or_group_index = leading_or_facets.find_index(facet_obj[:id])
next_facet_id = sorted_facets[index + 1].try(:[], :id)

# this block handles 3 cases: (1) regular AND (2) leading OR (3) trailing OR
if or_group_index && trailing_or_facets[or_group_index] == next_facet_id
# we're at the start of a pair of facets that should be grouped by OR
or_grouped_where_clause = "(#{query_elements[:where]} OR "
else
if or_grouped_where_clause
# we're on the second of a pair of facets that should be grouped by OR
where_clauses << or_grouped_where_clause + "#{query_elements[:where]})"
or_grouped_where_clause = nil
else
# we're on a regular AND facet
where_clauses << query_elements[:where]
end
end
end
# prepend WITH clauses before base_query (if needed), then add FROM and dependent WHERE clauses
# all facets are treated as AND clauses
with_statement = with_clauses.any? ? "WITH #{with_clauses.join(", ")} " : ""
with_statement + base_query + from_clause + " WHERE " + where_clauses.join(" AND ")
end

def self.get_query_elements_for_facet(facet_obj)
query_elements = {
where: nil,
with: nil,
from: nil,
select: nil,
display_where: nil
}
# get the facet instance in order to run query
search_facet = facet_obj[:db_facet]
column_name = search_facet.big_query_id_column
if search_facet.is_array_based?
# if facet is array-based, we need to format an array of filter values selected by user
# and add this as a WITH clause, then add two UNNEST() calls for both the BQ array column
# and the user filters to optimize the query
# example query:
# WITH disease_filters AS (SELECT['MONDO_0000001', 'MONDO_0006052'] as disease_value)
# FROM cell_metadata.alexandria_convention, disease_filters, UNNEST(disease_filters.disease_value) AS disease_val
# WHERE (disease_val IN UNNEST(disease))
facet_id = search_facet.identifier
filter_arr_name = "#{facet_id}_filters"
filter_val_name = "#{facet_id}_value"
filter_where_val = "#{facet_id}_val"
filter_values = facet_obj[:filters].map { |filter| sanitize_filter_value(filter[:id]) }
query_elements[:with] = "#{filter_arr_name} AS (SELECT#{filter_values} as #{filter_val_name})"
query_elements[:from] = "#{filter_arr_name}, UNNEST(#{filter_arr_name}.#{filter_val_name}) AS #{filter_where_val}"
query_elements[:where] = "(#{filter_where_val} IN UNNEST(#{column_name}))"
query_elements[:select] = "#{filter_where_val}"
# to maximize XDSS queries, also check __ontology_label columns since Azul doesn't support IDs
if search_facet.is_ontology_based? && search_facet.big_query_name_column.present?
label_values = facet_obj[:filters].map { |filter| filter[:name] }
label_column = search_facet.big_query_name_column
label_filter_arr_name = "#{facet_id}_label_filters"
label_filter_val_name = "#{facet_id}_label_value"
label_filter_where_val = "#{facet_id}_label_val"
query_elements[:with] += ", #{label_filter_arr_name} AS (SELECT#{label_values} as #{label_filter_val_name})"
query_elements[:from] += ", #{label_filter_arr_name}, UNNEST(#{label_filter_arr_name}.#{label_filter_val_name}) AS #{label_filter_where_val}"
# reconstitute where clause to use OR to match on either ID or label
query_elements[:where] = "((#{filter_where_val} IN UNNEST(#{column_name})) OR (#{label_filter_where_val} IN UNNEST(#{label_column})))"
query_elements[:select] += ", #{label_filter_where_val}"
end
elsif search_facet.is_numeric?
# run a range query (e.g. WHERE organism_age BETWEEN 20 and 60)
query_elements[:select] = "#{column_name}"
query_on = column_name
min_value = facet_obj[:filters][:min]
max_value = facet_obj[:filters][:max]
unit = facet_obj[:filters][:unit]
if search_facet.must_convert?
query_on = search_facet.big_query_conversion_column
min_value = search_facet.calculate_time_in_seconds(base_value: min_value, unit_label: unit)
max_value = search_facet.calculate_time_in_seconds(base_value: max_value, unit_label: unit)
end
query_elements[:where] = "#{query_on} BETWEEN #{min_value} AND #{max_value}"
else
query_elements[:select] = "#{column_name}"
# for non-array columns we can pass an array of quoted values and call IN directly
filter_values = facet_obj[:filters].map { |filter| sanitize_filter_value(filter[:id]) }
main_query = "#{column_name} IN ('#{filter_values.join('\',\'')}')"
query_elements[:where] = main_query
# to maximize XDSS queries, also check __ontology_label columns since Azul doesn't support IDs
if search_facet.is_ontology_based? && search_facet.big_query_name_column.present?
label_values = facet_obj[:filters].map { |filter| sanitize_filter_value(filter[:name]) }
extra_query = "#{search_facet.big_query_name_column} IN ('#{label_values.join('\',\'')}')"
query_elements[:where] = "(#{main_query} OR #{extra_query})"
end
end
query_elements
end

# convert a list of facet filters into a keyword search for inferred matching
# treats each facet separately so we can find intersection across all
def self.convert_filters_for_inferred_search(facets:)
Expand Down Expand Up @@ -771,15 +655,13 @@ def self.match_results_by_filter(search_result:, result_key:, facets:)
match.delete(:name)
match
else
matching_facet[:filters].detect { |filter| filter[:id] == search_result[result_key] || filter[:name] == search_result[result_key]}
matching_facet[:filters].detect do |filter|
filters = search_result[result_key].is_a?(Array) ? search_result[result_key] : [search_result[result_key]]
filters.include?(filter[:id]) || filters.include?(filter[:name])
end
end
end

# divide facets into mongo- and bigquery-based
def self.divide_facets_by_source(facets)
facets.partition { |facet| facet[:db_facet].is_mongo_based }
end

# properly escape any single quotes in a filter value (double quotes are correctly handled already)
def self.sanitize_filter_value(filter)
filter.gsub(/'/) { "\\'" }
Expand Down
4 changes: 0 additions & 4 deletions app/controllers/application_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@ def self.batch_api_client
@@batch_api_client ||= BatchApiClient.new
end

def self.big_query_client
@@big_query_client ||= BigQueryClient.new.client
end

# getter for FireCloudClient instance
def self.firecloud_client
@@firecloud_client ||= FireCloudClient.new
Expand Down
2 changes: 2 additions & 0 deletions app/javascript/components/search/controls/FacetsPanel.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import FacetControl from './FacetControl'
import CombinedFacetControl from './CombinedFacetControl'
import MoreFacetsButton from './MoreFacetsButton'
import { SearchFacetContext } from '~/providers/SearchFacetProvider'
import OptionsButton from '~/components/search/controls/OptionsButton'

const defaultFacetIds = ['disease', 'species']
const moreFacetIds = [
Expand Down Expand Up @@ -71,6 +72,7 @@ export default function FacetsPanel() {
}
<CombinedFacetControl controlDisplayName="cell type" facetIds={['cell_type', 'cell_type__custom']}/>
<MoreFacetsButton facets={moreFacets} />
<OptionsButton />
<Modal
show={showSearchHelpModal}
onHide={() => closeModal(setShowSearchHelpModal)}
Expand Down
41 changes: 41 additions & 0 deletions app/javascript/components/search/controls/OptionsButton.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import React, { useState, useContext } from 'react'
import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
import { faCogs } from '@fortawesome/free-solid-svg-icons'
import { Popover, OverlayTrigger } from 'react-bootstrap'
import { StudySearchContext } from '~/providers/StudySearchProvider'

import OptionsControl from '~/components/search/controls/OptionsControl'

export default function OptionsButton() {
const searchContext = useContext(StudySearchContext)
const [showOptions, setShowOptions] = useState(false)
const configuredOptions = [
{ searchProp: 'external', value: 'hca', label: 'Include HCA results' }
]

const optionsPopover = <Popover data-analytics-name='search-options-menu' id='search-options-menu'>
<ul className="facet-filter-list">
{
configuredOptions.map((option) => {
return <OptionsControl
key={option.searchProp}
searchContext={searchContext}
searchProp={option.searchProp}
value={option.value}
label={option.label}/>
})
}
</ul>
</Popover>

return (
<OverlayTrigger trigger={['click']} placement='bottom' animation={false} overlay={optionsPopover}>
<span id="search-options-button" data-testid="search-options-button"
className={`facet ${showOptions ? 'active' : ''}`}>
<a onClick={() => setShowOptions(!showOptions)}>
<FontAwesomeIcon className="icon-left" icon={faCogs}/>Options
</a>
</span>
</OverlayTrigger>
)
}
21 changes: 21 additions & 0 deletions app/javascript/components/search/controls/OptionsControl.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import React, { useState } from 'react'

export default function OptionsControl({searchContext, searchProp, value, label}) {
const defaultChecked = searchContext.params[searchProp] === value
const [isChecked, setIsChecked] = useState(defaultChecked)

/** toggle state of checkbox */
function toggleCheckbox(checked) {
setIsChecked(checked)
searchContext.updateSearch({ [searchProp] : checked ? value : null })
}

return (
<li id={`options-control-${searchProp}`} key={`options-control-${searchProp}`}>
<label>
<input type="checkbox" checked={isChecked} onChange={() => {toggleCheckbox(!isChecked)}}/>
<span onClick={() => {toggleCheckbox(!isChecked)}} >{ label }</span>
</label>
</li>
)
}
2 changes: 1 addition & 1 deletion app/javascript/lib/scp-api.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ export async function fetchSearch(type, searchParams, mock=false) {
export function buildSearchQueryString(type, searchParams) {
const facetsParam = buildFacetQueryString(searchParams.facets)

const params = ['page', 'order', 'terms', 'preset', 'genes', 'genePage']
const params = ['page', 'order', 'terms', 'external', 'preset', 'genes', 'genePage']
let otherParamString = params.map(param => {
return searchParams[param] ? `&${param}=${searchParams[param]}` : ''
}).join('')
Expand Down
1 change: 1 addition & 0 deletions app/javascript/providers/GeneSearchProvider.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ export function buildParamsFromQuery(query, preset) {
page: queryParams.genePage ? parseInt(queryParams.genePage) : 1,
genes: cleanGeneParams,
terms: queryParams.terms ? queryParams.terms : '',
external: queryParams.external ? queryParams.external : '',
facets: buildFacetsFromQueryString(queryParams.facets),
preset: preset ? preset : queryString.preset_search
}
Expand Down
2 changes: 1 addition & 1 deletion app/javascript/providers/SearchSelectionProvider.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ export default function SearchSelectionProvider(props) {
const [selection, setSelection] = useState(
appliedSelection ?
appliedSelection :
{ terms: '', facets: {} })
{ terms: '', facets: {}, external: '' })
selection.updateSelection = updateSelection
selection.updateFacet = updateFacet
selection.performSearch = performSearch
Expand Down
Loading
Loading