Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions app/controllers/api/v1/visualization/expression_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class ExpressionController < ApiBaseController
key :description, 'Type of plot data requested'
key :required, true
key :type, :string
key :enum, %w(violin heatmap morpheus)
key :enum, %w(violin heatmap morpheus dotplot)
end
parameter do
key :name, :cluster
Expand Down Expand Up @@ -117,6 +117,8 @@ def show
render_heatmap
when 'morpheus'
render_morpheus_json
when 'dotplot'
render_dotplot
else
render json: { error: "Unknown expression data type: #{data_type}" }, status: :bad_request
end
Expand Down Expand Up @@ -174,12 +176,23 @@ def render_morpheus_json
render json: expression_data, status: :ok
end

def render_dotplot
if @cluster.nil?
render json: { error: 'Requested cluster not found' }, status: :not_found and return
end

expression_data = ExpressionVizService.load_precomputed_dot_plot_data(
@study, @cluster, annotation: @annotation, genes: @genes
)
render json: expression_data, status: :ok
end

private

# enforce a limit on number of genes allowed for visualization requests
# see StudySearchService::MAX_GENE_SEARCH
def check_gene_limit
return true if params[:genes].blank?
return true if params[:genes].blank? || params[:data_type] == 'dotplot'

# render 422 if more than MAX_GENE_SEARCH as request fails internal validation
num_genes = params[:genes].split(',').size
Expand All @@ -194,7 +207,11 @@ def set_cluster
end

def set_genes
@genes = RequestUtils.get_genes_from_param(@study, params[:genes])
if params[:data_type] == 'dotplot'
@genes = params[:genes].split(',').map(&:strip).reject(&:empty?)
else
@genes = RequestUtils.get_genes_from_param(@study, params[:genes])
end
end

def set_selected_annotation
Expand Down
12 changes: 11 additions & 1 deletion app/javascript/components/search/results/ResultsPanel.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ import LoadingSpinner from '~/lib/LoadingSpinner'
*/
const ResultsPanel = ({ studySearchState, studyComponent, noResultsDisplay, bookmarks }) => {
const results = studySearchState.results
const hcaMessage = <a
className='hca-link'
onClick={() => studySearchState.updateSearch({ external: 'hca' })}
data-analytics-event='search-hca-empty-results'>
Search HCA Data Portal?
</a>

const emptyResultMessage = <div>
No results found. { studySearchState?.params?.external === "" ? hcaMessage : null }
</div>

let panelContent
if (studySearchState.isError) {
Expand Down Expand Up @@ -47,7 +57,7 @@ const ResultsPanel = ({ studySearchState, studyComponent, noResultsDisplay, book
</>
)
} else {
noResultsDisplay = noResultsDisplay ? noResultsDisplay : <div> No results found. </div>
noResultsDisplay = noResultsDisplay ? noResultsDisplay : emptyResultMessage
panelContent = (
<>
<SearchQueryDisplay terms={results.termList} facets={results.facets} />
Expand Down
5 changes: 5 additions & 0 deletions app/javascript/styles/_resultsPanel.scss
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,8 @@
min-width: 120px;
height: 20px;
}

.hca-link {
font-weight: bold;
cursor: pointer;
}
107 changes: 107 additions & 0 deletions app/lib/dot_plot_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# frozen_string_literal: true

# service that handles preprocessing expression/annotation data to speed up dot plot rendering
class DotPlotService
# main handler for launching ingest job to process expression data
#
# * *params*
# - +study+ (Study) => the study that owns the data
# - +cluster_group+ (ClusterGroup) => the cluster to source cell names from
# - +annotation_file+ (StudyFile) => the StudyFile containing annotation data
# - +expression_file+ (StudyFile) => the StudyFile to source data from
#
# * *yields*
# - (IngestJob) => the job that will be run to process the data
def self.run_preprocess_expression_job(study, cluster_group, annotation_file, expression_file)
study_eligible?(study) # method stub, waiting for scp-ingest-pipeline implementation
end

# determine study eligibility - can only have one processed matrix and be able to visualize clusters
#
# * *params*
# - +study+ (Study) the study that owns the data
# * *returns*
# - (Boolean) true if the study is eligible for dot plot visualization
def self.study_eligible?(study)
processed_matrices = study_processed_matrices(study)
study.can_visualize_clusters? && study.has_expression_data? && processed_matrices.size == 1
end

# check if the given study/cluster has already been preprocessed
# * *params*
# - +study+ (Study) the study that owns the data
# - +cluster_group+ (ClusterGroup) the cluster to check for processed data
#
# * *returns*
# - (Boolean) true if the study/cluster has already been processed
def self.cluster_processed?(study, cluster_group)
DotPlotGene.where(study:, cluster_group:).exists?
end

# get processed expression matrices for a study
#
# * *params*
# - +study+ (Study) the study to get matrices for
#
# * *returns*
# - (Array<StudyFile>) an array of processed expression matrices for the study
def self.study_processed_matrices(study)
study.expression_matrices.select do |matrix|
matrix.is_viz_anndata? || !matrix.is_raw_counts_file?
end
end

# seeding method for testing purposes, will be removed once pipeline is in place
# data is random and not representative of actual expression data
def self.seed_dot_plot_genes(study)
return false unless study_eligible?(study)

DotPlotGene.where(study_id: study.id).delete_all
puts "Seeding dot plot genes for #{study.accession}"
expression_matrix = study.expression_matrices.first
print 'assembling genes and annotations...'
genes = Gene.where(study:, study_file: expression_matrix).pluck(:name)
annotations = AnnotationVizService.available_metadata_annotations(
study, annotation_type: 'group'
).reject { |a| a[:scope] == 'invalid' }
puts " done. Found #{genes.size} genes and #{annotations.size} study-wide annotations."
study.cluster_groups.each do |cluster_group|
next if cluster_processed?(study, cluster_group)

cluster_annotations = ClusterVizService.available_annotations_by_cluster(
cluster_group, 'group'
).reject { |a| a[:scope] == 'invalid' }
all_annotations = annotations + cluster_annotations
puts "Processing #{cluster_group.name} with #{all_annotations.size} annotations."
documents = []
genes.each do |gene|
exp_scores = all_annotations.map do |annotation|
{
"#{annotation[:name]}--#{annotation[:type]}--#{annotation[:scope]}" => annotation[:values].map do |value|
{ value => [rand.round(3), rand.round(3)] }
end.reduce({}, :merge)
}
end.reduce({}, :merge)
documents << DotPlotGene.new(
study:, study_file: expression_matrix, cluster_group:, gene_symbol: gene, searchable_gene: gene.downcase,
exp_scores:
).attributes
if documents.size == 1000
DotPlotGene.collection.insert_many(documents)
count = DotPlotGene.where(study_id: study.id, cluster_group_id: cluster_group.id).size
puts "Inserted #{count}/#{genes.size} DotPlotGenes for #{cluster_group.name}."
documents.clear
end
end
DotPlotGene.collection.insert_many(documents)
count = DotPlotGene.where(study_id: study.id, cluster_group_id: cluster_group.id).size
puts "Inserted #{count}/#{genes.size} DotPlotGenes for #{cluster_group.name}."
puts "Finished processing #{cluster_group.name}"
end
puts "Seeding complete for #{study.accession}, #{DotPlotGene.where(study_id: study.id).size} DotPlotGenes created."
true
rescue StandardError => e
puts "Error seeding DotPlotGenes in #{study.accession}: #{e.message}"
false
end
end
12 changes: 12 additions & 0 deletions app/lib/expression_viz_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -472,4 +472,16 @@ def self.get_column_metadata(cells, annotation_name, annotations)
]
}
end

# load precomputed dot plot data for a given study and cluster and gene set
def self.load_precomputed_dot_plot_data(study, cluster_group, annotation: {}, genes: [])
data = { annotation_name: annotation[:name], values: annotation[:values], genes: {} }
dot_plot_genes = DotPlotGene.where(study:, cluster_group:, :searchable_gene.in => genes.map(&:downcase))
dot_plot_genes.map do |gene|
data[:genes][gene.gene_symbol] = gene.scores_by_annotation(
annotation[:name], annotation[:scope], annotation[:values]
)
end
data
end
end
13 changes: 13 additions & 0 deletions app/models/delete_queue_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ def perform
# now remove all child objects first to free them up to be re-used.
case file_type
when 'Cluster'
cluster_group = ClusterGroup.find_by(study:, study_file_id: object.id)
delete_differential_expression_results(study:, study_file: object)
delete_parsed_data(object.id, study.id, ClusterGroup, DataArray)
delete_dot_plot_data(study.id, query: { cluster_group_id: cluster_group&.id })
delete_user_annotations(study:, study_file: object)
reset_default_cluster(study:)
reset_default_annotation(study:)
Expand All @@ -45,6 +47,7 @@ def perform
remove_file_from_bundle
when 'Expression Matrix'
delete_parsed_data(object.id, study.id, Gene, DataArray)
delete_dot_plot_data(study.id, query: { study_file_id: object.id })
delete_differential_expression_results(study:, study_file: object)
study.set_gene_count
when 'MM Coordinate Matrix'
Expand Down Expand Up @@ -73,6 +76,7 @@ def perform
end
delete_differential_expression_results(study:, study_file: object)
delete_parsed_data(object.id, study.id, CellMetadatum, DataArray)
delete_dot_plot_data(study.id)
delete_cell_index_arrays(study)
study.update(cell_count: 0)
reset_default_annotation(study:)
Expand All @@ -81,6 +85,7 @@ def perform
# delete user annotations first as we lose associations later
delete_user_annotations(study:, study_file: object)
delete_parsed_data(object.id, study.id, ClusterGroup, CellMetadatum, Gene, DataArray)
delete_dot_plot_data(study.id)
delete_fragment_files(study:, study_file: object)
delete_differential_expression_results(study:, study_file: object)
# reset default options/counts
Expand Down Expand Up @@ -338,4 +343,12 @@ def delete_parsed_anndata_entries(study_file_id, study_id, fragment)
data_arrays.delete_all
end
end

# delete preprocessed dot plot data for a study with a specific query
# if a user deletes processed expression/metadata file, all data is cleaned up
# if a user delete a cluster file, only matching entries are removed
def delete_dot_plot_data(study_id, query: nil)
dot_query = query.blank? ? { study_id: } : { study_id:, **query }
DotPlotGene.where(dot_query).delete_all
end
end
32 changes: 32 additions & 0 deletions app/models/dot_plot_gene.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
class DotPlotGene
include Mongoid::Document
include Mongoid::Timestamps

belongs_to :study
belongs_to :study_file # expression matrix, not clustering file - needed for data cleanup
belongs_to :cluster_group

field :gene_symbol, type: String
field :searchable_gene, type: String
field :exp_scores, type: Hash, default: {}

validates :study, :study_file, :cluster_group, presence: true
validates :gene_symbol, uniqueness: { scope: %i[study study_file cluster_group] }, presence: true

before_validation :set_searchable_gene, on: :create
index({ study_id: 1, study_file_id: 1, cluster_group_id: 1 }, { unique: false, background: true })
index({ study_id: 1, cluster_group_id: 1, searchable_gene: 1 },
{ unique: true, background: true })

def scores_by_annotation(annotation_name, annotation_scope, values)
identifier = "#{annotation_name}--group--#{annotation_scope}"
scores = exp_scores[identifier] || {}
values.map { |val| scores[val] || [0.0, 0.0] }
end

private

def set_searchable_gene
self.searchable_gene = gene_symbol.downcase
end
end
5 changes: 4 additions & 1 deletion app/models/search_facet.rb
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,10 @@ def filters_from_metadatum(metadatum)
minmax.compact.empty? ? [] : [{ MIN: minmax.first.to_f, MAX: minmax.last.to_f }]
else
ids = metadatum.values.map { |i| i.gsub(/:/, '_') } # deal with ontology id format issues
values = CellMetadatum.find_by(study_id: metadatum.study_id, name: big_query_name_column).values
values = CellMetadatum.find_by(study_id: metadatum.study_id, name: big_query_name_column)&.values
# some non-required convention entries like cell_type may not have ontology labels so return if either are blank
return [] if ids.blank? || values.blank?

# deal with array-based annotations
if is_array_based
ids = ids.map { |i| i.split('|') }.flatten
Expand Down
11 changes: 11 additions & 0 deletions db/migrate/20250616192718_create_dot_plot_gene_collection.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class CreateDotPlotGeneCollection < Mongoid::Migration
def self.up
# since these documents will be created by scp-ingest-pipeline, the collection needs to exist first to
# prevent errors when the job tries to create them
DotPlotGene.collection.create
end

def self.down
DotPlotGene.collection.drop
end
end
33 changes: 33 additions & 0 deletions test/api/visualization/expression_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,39 @@ class ExpressionControllerTest < ActionDispatch::IntegrationTest
assert_equal 400, response.status # 400 since study is not visualizable
end

test 'should render precomputed dotplot data' do
cluster = @basic_study.cluster_groups.first
exp_scores = @basic_study.cell_metadata.map do |metadata|
{
metadata.annotation_select_value => metadata.values.map do |value|
{ value => [rand.round(3), rand.round(3)] }
end.reduce({}, :merge)
}
end.reduce({}, :merge)
genes = %w[PTEN AGPAT2 PHEX FARSA GAD1 EGFR CLDN4]
genes.each do |gene_symbol|
DotPlotGene.create(
study: @basic_study,
study_file: @basic_study_exp_file,
cluster_group: cluster,
gene_symbol:,
exp_scores:
)
end
sign_in_and_update @user
annotation = @basic_study.cell_metadata.where(annotation_type: 'group').sample
execute_http_request(:get, api_v1_study_expression_path(@basic_study, 'dotplot', {
cluster: cluster.name,
annotation_name: annotation.name,
annotation_type: 'group',
annotation_scope: 'study',
genes: genes.join(',')
}), user: @user)
assert_equal 200, response.status
gene_entry = json.dig('genes', genes.sample)
assert_equal exp_scores[annotation.annotation_select_value].values, gene_entry
end

test 'should query by gene ID' do
gene_id = @basic_study.genes.first.gene_id
sign_in_and_update @user
Expand Down
33 changes: 33 additions & 0 deletions test/js/resultsPanel.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,37 @@ describe('<StudyResultsContainer/> rendering>', () => {
expect(container.getElementsByClassName('error-panel')).toHaveLength(0)
expect(container.getElementsByClassName('results-header')).toHaveLength(1)
})
it('should render message about HCA when no results found', () => {
const studySearchState = {
isError: false,
isLoaded: true,
params: { external: '' },
results: {
studies: [],
facets: {}
}
}
const { container } = render(
<ResultsPanel studySearchState={studySearchState}/>
)
expect(container.getElementsByClassName('loading-panel')).toHaveLength(0)
expect(container.getElementsByClassName('error-panel')).toHaveLength(0)
expect(container.getElementsByClassName('results-header')).toHaveLength(0)
expect(container.textContent).toContain('Search HCA Data Portal?')
})
it('should not render message about HCA if already requested', () => {
const studySearchState = {
isError: false,
isLoaded: true,
params: { external: 'hca' },
results: {
studies: [],
facets: { cell_type: 'CL_0000548' }
}
}
const { container } = render(
<ResultsPanel studySearchState={studySearchState} />
)
expect(container.textContent).not.toContain('Search HCA Data Portal?')
})
})
Loading