diff --git a/app/javascript/components/search/controls/FiltersBoxSearchable.jsx b/app/javascript/components/search/controls/FiltersBoxSearchable.jsx
index 26c0f398e8..d746118762 100644
--- a/app/javascript/components/search/controls/FiltersBoxSearchable.jsx
+++ b/app/javascript/components/search/controls/FiltersBoxSearchable.jsx
@@ -1,13 +1,13 @@
import React, { useState } from 'react'
import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
-import { faExternalLinkAlt, faTimesCircle } from '@fortawesome/free-solid-svg-icons'
+import { faExternalLinkAlt, faSearch, faTimesCircle } from '@fortawesome/free-solid-svg-icons'
import pluralize from 'pluralize'
import _find from 'lodash/find'
import _remove from 'lodash/remove'
-import { fetchFacetFilters } from '~/lib/scp-api'
import FiltersBox from './FiltersBox'
-import FiltersSearchBar from './FiltersSearchBar'
+import FormControl from 'react-bootstrap/lib/FormControl'
+import Button from 'react-bootstrap/lib/Button'
/**
* Component for filter search and filter lists
@@ -15,6 +15,7 @@ import FiltersSearchBar from './FiltersSearchBar'
export default function FiltersBoxSearchable({ facet, selection, setSelection, show, setShow, hideControls }) {
// State that is specific to FiltersBox
const [matchingFilters, setMatchingFilters] = useState(facet.filters)
+ const [searchText, setSearchText] = useState('')
const [hasFilterSearchResults, setHasFilterSearchResults] = useState(false)
/*
@@ -42,14 +43,17 @@ export default function FiltersBoxSearchable({ facet, selection, setSelection, s
* For example, among the many filters in the "Disease" facet, search
* for filters matching the term "tuberculosis".
*/
- async function searchFilters(terms) {
- const apiData = await fetchFacetFilters(facet.id, terms)
- const matchingFilters = apiData.filters
- const hasResults = apiData.query !== '' && matchingFilters.length > 0
+ function searchFilters(terms) {
+ const lcTerms = terms.split(' ').map(text => text.toLowerCase())
+ const newFilters = facet.filters.filter(facetFilter => {
+ return lcTerms.some(lcTerm => {
+ return facetFilter.name.toLowerCase().includes(lcTerm)
+ })
+ })
+ const hasResults = newFilters.length > 0
setHasFilterSearchResults(hasResults)
-
- setMatchingFilters(matchingFilters)
+ setMatchingFilters(newFilters)
}
/**
@@ -98,10 +102,30 @@ export default function FiltersBoxSearchable({ facet, selection, setSelection, s
show &&
{ showSearchBar && (
<>
-
+
+
+
{
+ setSearchText(e.target.value)
+ searchFilters(e.target.value)
+ }
+ }
+ />
+
+
+
+
+
{ selectedFilterBadges }
diff --git a/app/javascript/components/search/controls/FiltersSearchBar.jsx b/app/javascript/components/search/controls/FiltersSearchBar.jsx
deleted file mode 100644
index 1401caffeb..0000000000
--- a/app/javascript/components/search/controls/FiltersSearchBar.jsx
+++ /dev/null
@@ -1,44 +0,0 @@
-import React, { useState } from 'react'
-import Form from 'react-bootstrap/lib/Form'
-import FormControl from 'react-bootstrap/lib/FormControl'
-import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
-import { faSearch } from '@fortawesome/free-solid-svg-icons'
-import Button from 'react-bootstrap/lib/Button'
-
-/**
- * Component to search filters within a given facet
- * Used when facet has many available filters (e.g. disease)
- */
-export default function FiltersSearchBar({ searchFilters, filtersBoxId }) {
- const [searchText, setSearchText] = useState('')
-
- /** perform a search for matching filters */
- async function handleFilterSearchSubmit(event) {
- event.preventDefault() // catch keyboard return and prevent form submit
- await searchFilters(searchText)
- }
-
- return (
-
- )
-}
diff --git a/app/lib/differential_expression_service.rb b/app/lib/differential_expression_service.rb
index 3de7d03463..638ba46766 100644
--- a/app/lib/differential_expression_service.rb
+++ b/app/lib/differential_expression_service.rb
@@ -131,7 +131,7 @@ def self.run_differential_expression_job(cluster_group, study, user, annotation_
de_type: 'rest', group1: nil, group2: nil, machine_type: nil, dry_run: nil)
validate_study(study)
validate_annotation(cluster_group, study, annotation_name, annotation_scope, group1:, group2:)
- cluster_url = cluster_file_url(cluster_group)
+ cluster_url = RequestUtils.cluster_file_url(cluster_group)
study_file = cluster_group.study_file
metadata_url = study_file.is_viz_anndata? ?
RequestUtils.data_fragment_url(study_file, 'metadata') :
@@ -480,23 +480,6 @@ def self.encode_filename(values)
values.map { |val| val.gsub(/\+/, 'pos').gsub(/\W/, '_') }.join('--')
end
- # return a GS URL for a requested ClusterGroup, depending on file type
- #
- # * *params*
- # - +cluster_group+ (ClusterGroup) => Clustering object to source name/file from
- #
- # * *returns*
- # - (String)
- def self.cluster_file_url(cluster_group)
- study_file = cluster_group.study_file
- if study_file.is_viz_anndata?
- data_frag = study_file.ann_data_file_info.find_fragment(data_type: :cluster, name: cluster_group.name)
- RequestUtils.data_fragment_url(study_file, 'cluster', file_type_detail: data_frag[:obsm_key_name])
- else
- study_file.gs_url
- end
- end
-
# retrieve the weekly user quota value
#
# * *returns*
diff --git a/app/lib/dot_plot_service.rb b/app/lib/dot_plot_service.rb
index 39b26515cd..885100358c 100644
--- a/app/lib/dot_plot_service.rb
+++ b/app/lib/dot_plot_service.rb
@@ -1,27 +1,81 @@
-# frozen_string_literal: true
-
# service that handles preprocessing expression/annotation data to speed up dot plot rendering
class DotPlotService
- # main handler for launching ingest job to process expression data
+ # main handler for launching ingest job to process expression data into DotPlotGene objects
+ # since the study can have only one processed matrix/metadata file, this will only run if the study is eligible
#
# * *params*
# - +study+ (Study) => the study that owns the data
- # - +cluster_group+ (ClusterGroup) => the cluster to source cell names from
- # - +annotation_file+ (StudyFile) => the StudyFile containing annotation data
- # - +expression_file+ (StudyFile) => the StudyFile to source data from
+ # - +cluster_group+ (ClusterGroup) => the cluster to set associations for
+ # - +user+ (User) => the user that will run the job
#
# * *yields*
# - (IngestJob) => the job that will be run to process the data
- def self.run_preprocess_expression_job(study, cluster_group, annotation_file, expression_file)
- study_eligible?(study) # method stub, waiting for scp-ingest-pipeline implementation
+ def self.run_process_dot_plot_genes(study, cluster_group, user)
+ validate_study(study, cluster_group)
+ expression_file = study_processed_matrices(study)&.first
+ metadata_file = study.metadata_file
+ validate_source_data(expression_file, metadata_file)
+ params_object = create_params_object(cluster_group, expression_file, metadata_file)
+ if params_object.valid?
+ job = IngestJob.new(
+ study:, study_file: expression_file, user:, action: :ingest_dot_plot_genes, params_object:
+ )
+ job.delay.push_remote_and_launch_ingest
+ true
+ else
+ raise ArgumentError, "job parameters failed to validate: #{params_object.errors.full_messages.join(', ')}"
+ end
+ end
+
+ # create DotPlotGeneIngestParameters object based on the provided files
+ #
+ # * *params*
+ # - +cluster_group+ (ClusterGroup) => the cluster group to associate with
+ # - +expression_file+ (StudyFile) => the expression matrix file to process
+ # - +metadata_file+ (StudyFile) => the metadata file to source annotations
+ #
+ # * *returns*
+ # - (DotPlotGeneIngestParameters) => a parameters object with the necessary file paths and metadata
+ def self.create_params_object(cluster_group, expression_file, metadata_file)
+ params = {
+ cluster_group_id: cluster_group.id,
+ cluster_file: RequestUtils.cluster_file_url(cluster_group)
+ }
+ case expression_file.file_type
+ when 'Expression Matrix'
+ params[:matrix_file_type] = 'dense'
+ params[:matrix_file_path] = expression_file.gs_url
+ params[:cell_metadata_file] = metadata_file.gs_url
+ when 'MM Coordinate Matrix'
+ params[:matrix_file_type] = 'mtx'
+ genes_file = expression_file.bundled_files.detect { |f| f.file_type == '10X Genes File' }
+ barcodes_file = expression_file.bundled_files.detect { |f| f.file_type == '10X Barcodes File' }
+ params[:matrix_file_path] = expression_file.gs_url
+ params[:cell_metadata_file] = metadata_file.gs_url
+ params[:gene_file] = genes_file.gs_url
+ params[:barcode_file] = barcodes_file.gs_url
+ when 'AnnData'
+ params[:matrix_file_type] = 'mtx' # extracted expression for AnnData is in MTX format
+ params[:cell_metadata_file] = RequestUtils.data_fragment_url(metadata_file, 'metadata')
+ params[:matrix_file_path] = RequestUtils.data_fragment_url(
+ expression_file, 'matrix', file_type_detail: 'processed'
+ )
+ params[:gene_file] = RequestUtils.data_fragment_url(
+ expression_file, 'features', file_type_detail: 'processed'
+ )
+ params[:barcode_file] = RequestUtils.data_fragment_url(
+ expression_file, 'barcodes', file_type_detail: 'processed'
+ )
+ end
+ DotPlotGeneIngestParameters.new(**params)
end
# determine study eligibility - can only have one processed matrix and be able to visualize clusters
#
# * *params*
- # - +study+ (Study) the study that owns the data
+ # - +study+ (Study) => the study that owns the data
# * *returns*
- # - (Boolean) true if the study is eligible for dot plot visualization
+ # - (Boolean) => true if the study is eligible for dot plot visualization
def self.study_eligible?(study)
processed_matrices = study_processed_matrices(study)
study.can_visualize_clusters? && study.has_expression_data? && processed_matrices.size == 1
@@ -29,11 +83,11 @@ def self.study_eligible?(study)
# check if the given study/cluster has already been preprocessed
# * *params*
- # - +study+ (Study) the study that owns the data
- # - +cluster_group+ (ClusterGroup) the cluster to check for processed data
+ # - +study+ (Study) => the study that owns the data
+ # - +cluster_group+ (ClusterGroup) => the cluster to check for processed data
#
# * *returns*
- # - (Boolean) true if the study/cluster has already been processed
+ # - (Boolean) => true if the study/cluster has already been processed
def self.cluster_processed?(study, cluster_group)
DotPlotGene.where(study:, cluster_group:).exists?
end
@@ -41,67 +95,41 @@ def self.cluster_processed?(study, cluster_group)
# get processed expression matrices for a study
#
# * *params*
- # - +study+ (Study) the study to get matrices for
+ # - +study+ (Study) => the study to get matrices for
#
# * *returns*
- # - (Array) an array of processed expression matrices for the study
+ # - (Array) => an array of processed expression matrices for the study
def self.study_processed_matrices(study)
study.expression_matrices.select do |matrix|
matrix.is_viz_anndata? || !matrix.is_raw_counts_file?
end
end
- # seeding method for testing purposes, will be removed once pipeline is in place
- # data is random and not representative of actual expression data
- def self.seed_dot_plot_genes(study)
- return false unless study_eligible?(study)
-
- DotPlotGene.where(study_id: study.id).delete_all
- puts "Seeding dot plot genes for #{study.accession}"
- expression_matrix = study.expression_matrices.first
- print 'assembling genes and annotations...'
- genes = Gene.where(study:, study_file: expression_matrix).pluck(:name)
- annotations = AnnotationVizService.available_metadata_annotations(
- study, annotation_type: 'group'
- ).reject { |a| a[:scope] == 'invalid' }
- puts " done. Found #{genes.size} genes and #{annotations.size} study-wide annotations."
- study.cluster_groups.each do |cluster_group|
- next if cluster_processed?(study, cluster_group)
+ # validate the study for dot plot preprocessing
+ #
+ # * *params*
+ # - +study+ (Study) => the study to validate
+ #
+ # * *raises*
+ # - (ArgumentError) => if the study is invalid or does not qualify for dot plot visualization
+ def self.validate_study(study, cluster_group)
+ raise ArgumentError, 'Invalid study' unless study.present? && study.is_a?(Study)
+ raise ArgumentError, 'Study does not qualify for dot plot visualization' unless study_eligible?(study)
+ raise ArgumentError, 'Study has already been processed' if cluster_processed?(study, cluster_group)
+ end
- cluster_annotations = ClusterVizService.available_annotations_by_cluster(
- cluster_group, 'group'
- ).reject { |a| a[:scope] == 'invalid' }
- all_annotations = annotations + cluster_annotations
- puts "Processing #{cluster_group.name} with #{all_annotations.size} annotations."
- documents = []
- genes.each do |gene|
- exp_scores = all_annotations.map do |annotation|
- {
- "#{annotation[:name]}--#{annotation[:type]}--#{annotation[:scope]}" => annotation[:values].map do |value|
- { value => [rand.round(3), rand.round(3)] }
- end.reduce({}, :merge)
- }
- end.reduce({}, :merge)
- documents << DotPlotGene.new(
- study:, study_file: expression_matrix, cluster_group:, gene_symbol: gene, searchable_gene: gene.downcase,
- exp_scores:
- ).attributes
- if documents.size == 1000
- DotPlotGene.collection.insert_many(documents)
- count = DotPlotGene.where(study_id: study.id, cluster_group_id: cluster_group.id).size
- puts "Inserted #{count}/#{genes.size} DotPlotGenes for #{cluster_group.name}."
- documents.clear
- end
- end
- DotPlotGene.collection.insert_many(documents)
- count = DotPlotGene.where(study_id: study.id, cluster_group_id: cluster_group.id).size
- puts "Inserted #{count}/#{genes.size} DotPlotGenes for #{cluster_group.name}."
- puts "Finished processing #{cluster_group.name}"
- end
- puts "Seeding complete for #{study.accession}, #{DotPlotGene.where(study_id: study.id).size} DotPlotGenes created."
- true
- rescue StandardError => e
- puts "Error seeding DotPlotGenes in #{study.accession}: #{e.message}"
- false
+ # validate required data is present for processing
+ #
+ # * *params*
+ # - +expression_file+ (StudyFile) => the expression matrix file to process
+ # - +metadata_file+ (StudyFile) => the metadata file to source annotations
+ #
+ # * *raises*
+ # - (ArgumentError) => if the source data is not fully parsed or MTX bundled is not completed
+ def self.validate_source_data(expression_file, metadata_file)
+ raise ArgumentError, 'Missing required files' unless expression_file.present? && metadata_file.present?
+ raise ArgumentError, 'Source data not fully parsed' unless expression_file.parsed? && metadata_file.parsed?
+ raise ArgumentError, 'MTX bundled not completed' if expression_file.should_bundle? &&
+ !expression_file.has_completed_bundle?
end
end
diff --git a/app/lib/request_utils.rb b/app/lib/request_utils.rb
index a3f1cfcf80..c529278c08 100644
--- a/app/lib/request_utils.rb
+++ b/app/lib/request_utils.rb
@@ -154,6 +154,24 @@ def self.data_fragment_url(ann_data_file, fragment_type, gs_url: true, file_type
"#{url}.#{ext}.gz"
end
+ # return a GS URL for a requested ClusterGroup, depending on file type
+ #
+ # * *params*
+ # - +cluster_group+ (ClusterGroup) => Clustering object to source name/file from
+ #
+ # * *returns*
+ # - (String)
+ def self.cluster_file_url(cluster_group)
+ study_file = cluster_group.study_file
+ if study_file.is_viz_anndata?
+ data_frag = study_file.ann_data_file_info.find_fragment(data_type: :cluster, name: cluster_group.name)
+ safe_frag = data_frag.with_indifferent_access
+ data_fragment_url(study_file, 'cluster', file_type_detail: safe_frag[:obsm_key_name])
+ else
+ study_file.gs_url
+ end
+ end
+
# extracts an array of genes from a comma-delimited string list of gene names
def self.get_genes_from_param(study, gene_param)
terms = RequestUtils.sanitize_search_terms(gene_param).split(',')
diff --git a/app/models/ann_data_file_info.rb b/app/models/ann_data_file_info.rb
index def5a7524b..c17f717d91 100644
--- a/app/models/ann_data_file_info.rb
+++ b/app/models/ann_data_file_info.rb
@@ -126,7 +126,14 @@ def merge_form_fragments(form_data, fragments)
# also supports finding values as both strings and symbols (for data_type values)
def find_fragment(**attrs)
data_fragments.detect do |fragment|
- !{ **attrs }.map { |k, v| fragment[k] == v || fragment[k] == v.send(transform_for(v)) }.include?(false)
+ !{ **attrs }.map do |k, v|
+ safe_v = v.send(transform_for(v))
+ safe_k = k.send(transform_for(k))
+ fragment[k] == v ||
+ fragment[k] == safe_v ||
+ fragment[safe_k] == v ||
+ fragment[safe_k] == safe_v
+ end.include?(false)
end
end
diff --git a/app/models/batch_api_client.rb b/app/models/batch_api_client.rb
index 807b6aa9ef..4ef5eb8c32 100644
--- a/app/models/batch_api_client.rb
+++ b/app/models/batch_api_client.rb
@@ -21,7 +21,8 @@ class BatchApiClient
ingest_differential_expression: ['Differential Expression'],
render_expression_arrays: %w[Cluster],
image_pipeline: %w[Cluster],
- ingest_anndata: %w[AnnData]
+ ingest_anndata: %w[AnnData],
+ ingest_dot_plot_genes: ['Expression Matrix', 'MM Coordinate Matrix', 'AnnData']
}.freeze
# default GCE machine_type
diff --git a/app/models/dot_plot_gene_ingest_parameters.rb b/app/models/dot_plot_gene_ingest_parameters.rb
new file mode 100644
index 0000000000..ae52f5d7af
--- /dev/null
+++ b/app/models/dot_plot_gene_ingest_parameters.rb
@@ -0,0 +1,52 @@
+# class to hold parameters specific to ingest job for computing dot plot gene metrics
+class DotPlotGeneIngestParameters
+ include ActiveModel::Model
+ include Parameterizable
+
+ PARAMETER_NAME = '--ingest-dot-plot-genes'
+
+ # cell_metadata_file: metadata file to source annotations
+ # cluster_file: clustering file with cells to use as control list for filtering and optional annotations
+ # cluster_group_id: BSON ID of ClusterGroup object for associations
+ # matrix_file_path: expression matrix with source data
+ # matrix_file_type: type of expression matrix (dense, sparse)
+ # gene_file (optional): genes/features file for sparse matrix
+ # barcode_file (optional): barcodes file for sparse matrix
+ # machine_type (optional): override for default ingest machine type (uses 'n2d-highmem-8')
+ PARAM_DEFAULTS = {
+ cell_metadata_file: nil,
+ cluster_file: nil,
+ cluster_group_id: nil,
+ matrix_file_path: nil,
+ matrix_file_type: nil,
+ gene_file: nil,
+ barcode_file: nil,
+ machine_type: 'n2d-highmem-8'
+ }.freeze
+
+ # values that are available as methods but not as attributes (and not passed to command line)
+ NON_ATTRIBUTE_PARAMS = %i[machine_type].freeze
+
+ attr_accessor(*PARAM_DEFAULTS.keys)
+
+ validates :cell_metadata_file, :cluster_file, :cluster_group_id, :matrix_file_path, :matrix_file_type, presence: true
+ validates :cell_metadata_file, :cluster_file, :matrix_file_path,
+ format: { with: Parameterizable::GS_URL_REGEXP, message: 'is not a valid GS url' }
+ validates :matrix_file_type, inclusion: %w[dense mtx]
+ validates :machine_type, inclusion: Parameterizable::GCE_MACHINE_TYPES
+ validates :gene_file, :barcode_file,
+ presence: true,
+ format: {
+ with: Parameterizable::GS_URL_REGEXP,
+ message: 'is not a valid GS url'
+ },
+ if: -> { matrix_file_type == 'mtx' }
+
+ def initialize(attributes = nil)
+ super
+ end
+
+ def cluster_group
+ ClusterGroup.find(cluster_group_id)
+ end
+end
diff --git a/app/models/ingest_job.rb b/app/models/ingest_job.rb
index bc017da5f4..e7c042737d 100644
--- a/app/models/ingest_job.rb
+++ b/app/models/ingest_job.rb
@@ -15,7 +15,7 @@ class IngestJob
# valid ingest actions to perform
VALID_ACTIONS = %i[
ingest_expression ingest_cluster ingest_cell_metadata ingest_anndata ingest_differential_expression ingest_subsample
- differential_expression render_expression_arrays
+ ingest_dot_plot_genes differential_expression render_expression_arrays
].freeze
# Mappings between actions & models (for cleaning up data on re-parses)
@@ -24,7 +24,8 @@ class IngestJob
ingest_cluster: ClusterGroup,
ingest_cell_metadata: CellMetadatum,
ingest_differential_expression: DifferentialExpressionResult,
- ingest_subsample: ClusterGroup
+ ingest_subsample: ClusterGroup,
+ ingest_dot_plot_genes: DotPlotGene
}.freeze
# non-standard job actions where data is not being read from a file to insert into MongoDB
@@ -37,7 +38,7 @@ class IngestJob
# jobs that need parameters objects in order to launch correctly
PARAMS_OBJ_REQUIRED = %i[
- differential_expression render_expression_arrays image_pipeline ingest_anndata
+ differential_expression render_expression_arrays image_pipeline ingest_anndata ingest_dot_plot_genes
].freeze
# Name of pipeline submission running in GCP (from [BatchApiClient#run_job])
@@ -393,8 +394,11 @@ def poll_for_completion(run_at: 1.minute.from_now)
set_study_state_after_ingest
study_file.invalidate_cache_by_file_type # clear visualization caches for file
log_to_mixpanel
- if action == :differential_expression
+ case action.to_sym
+ when :differential_expression
subject = "Differential expression analysis for #{study_file.file_type} file: '#{study_file.upload_file_name}' has completed processing"
+ when :ingest_dot_plot_genes
+ subject = "Dot plot gene metrics for #{study_file.file_type} file: '#{study_file.upload_file_name}' parse has completed"
else
subject = "#{study_file.file_type} file: '#{study_file.upload_file_name}' has completed parsing"
end
@@ -415,8 +419,11 @@ def poll_for_completion(run_at: 1.minute.from_now)
log_error_messages
log_to_mixpanel # log before queuing file for deletion to preserve properties
# don't delete files or notify users if this is a 'special action', like DE or image pipeline jobs
- if action == :differential_expression
+ case action.to_sym
+ when :differential_expression
subject = "Error: Differential expression analysis for #{study_file.file_type} file: '#{study_file.upload_file_name}' has failed processing"
+ when :ingest_dot_plot_genes
+ subject = "Error: Dot plot gene metrics for #{study_file.file_type} file: '#{study_file.upload_file_name}' parse has failed"
else
subject = "Error: #{study_file.file_type} file: '#{study_file.upload_file_name}' parse has failed"
end
@@ -450,12 +457,16 @@ def poll_for_completion(run_at: 1.minute.from_now)
# in case of subsampling, only subsampled data cleanup is run and all other data is left in place
# this reduces churn for study owners as full-resolution data is still valid
def handle_ingest_failure(email_subject)
- if action.to_sym == :ingest_subsample
+ case action.to_sym
+ when :ingest_subsample
study_file.update(parse_status: 'parsed') # reset parse flag
cluster_name = cluster_name_by_file_type
cluster = ClusterGroup.find_by(name: cluster_name, study:, study_file:)
cluster.find_subsampled_data_arrays&.delete_all
cluster.update(subsampled: false, is_subsampling: false)
+ when :ingest_dot_plot_genes
+ cluster = params_object.cluster_group
+ DotPlotGene.where(study_id: study.id, cluster_group_id: cluster.id, study_file_id: study_file.id).delete_all
else
create_study_file_copy
study_file.update(parse_status: 'failed')
@@ -1144,6 +1155,7 @@ def extracted_raw_counts?(job)
def skip_anndata_summary?
study_file.has_anndata_summary? ||
action == :differential_expression ||
+ action == :ingest_dot_plot_genes ||
should_retry? ||
(!failed? && action == :ingest_anndata)
end
@@ -1254,6 +1266,13 @@ def generate_success_email_array
complete_pipeline_runtime = TimeDifference.between(*get_image_pipeline_timestamps).humanize
message << "Image Pipeline image rendering completed for \"#{params_object.cluster}\""
message << "Complete runtime (data cache & image rendering): #{complete_pipeline_runtime}"
+ when :ingest_dot_plot_genes
+ cluster_group = params_object.cluster_group
+ genes = DotPlotGene.where(study:, study_file:, cluster_group:).count
+ message << "Dot plot gene preprocessing completed for cluster: #{cluster_group.name}"
+ message << "Total genes created: #{genes}"
+ else
+ message << "Ingest job completed for #{study_file.upload_file_name}"
end
message
end
diff --git a/app/models/search_facet.rb b/app/models/search_facet.rb
index 9f84e83e1d..9aa7b1c1a6 100644
--- a/app/models/search_facet.rb
+++ b/app/models/search_facet.rb
@@ -405,7 +405,8 @@ def get_unique_filter_values(public_only: false)
end
return filter_map if is_numeric?
- filter_map.uniq { |filter| [filter[:id]&.downcase, filter[:name]&.downcase] }.reject do |filter|
+ # some filters will have the same ontology id but different label, so take the first to prevent duplicates
+ filter_map.uniq { |filter| [filter[:id]&.downcase] }.reject do |filter|
filter[:id].blank? || filter[:name].blank?
end
end
diff --git a/app/models/study.rb b/app/models/study.rb
index d6b690948f..b54e7ea43c 100644
--- a/app/models/study.rb
+++ b/app/models/study.rb
@@ -143,6 +143,8 @@ def by_name(name)
end
end
+ has_many :dot_plot_genes, dependent: :delete_all
+
has_many :study_shares, dependent: :destroy do
def can_edit
where(permission: 'Edit').map(&:email)
diff --git a/config/application.rb b/config/application.rb
index 087e61af9b..cd3478775c 100644
--- a/config/application.rb
+++ b/config/application.rb
@@ -29,7 +29,7 @@ class Application < Rails::Application
config.middleware.use Rack::Brotli
# Docker image for file parsing via scp-ingest-pipeline
- config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.42.0'
+ config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.43.0'
# Docker image for image pipeline jobs
config.image_pipeline_docker_image = 'gcr.io/broad-singlecellportal-staging/image-pipeline:0.1.0_c2b090043'
diff --git a/test/lib/request_utils_test.rb b/test/lib/request_utils_test.rb
index 4cce0ccd2c..5ad78457ec 100644
--- a/test/lib/request_utils_test.rb
+++ b/test/lib/request_utils_test.rb
@@ -195,6 +195,31 @@ class RequestUtilsTest < ActionDispatch::IntegrationTest
end
end
+ test 'should construct cluster file url' do
+ anndata_file = FactoryBot.create(:ann_data_file,
+ name: 'anndata.h5ad',
+ study: @private_study,
+ cell_input: %w[A B C D],
+ coordinate_input: [
+ { umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
+ ])
+ cluster_group_umap = ClusterGroup.find_by(study_file: anndata_file, name: 'umap')
+ expected_url = "gs://#{@private_study.bucket_id}/_scp_internal/anndata_ingest/" \
+ "#{@private_study.accession}_#{anndata_file.id}/h5ad_frag.cluster.X_umap.tsv.gz"
+ assert_equal expected_url, RequestUtils.cluster_file_url(cluster_group_umap)
+ cluster_file = FactoryBot.create(:cluster_file,
+ name: 'cluster.tsv.gz',
+ study: @private_study,
+ cell_input: {
+ x: [1, 2, 3],
+ y: [1, 2, 3],
+ cells: %w[A B C]
+ })
+ expected_cluster_url = "gs://#{@private_study.bucket_id}/cluster.tsv.gz"
+ cluster_group = ClusterGroup.find_by(study_file: cluster_file, name: 'cluster.tsv.gz')
+ assert_equal expected_cluster_url, RequestUtils.cluster_file_url(cluster_group)
+ end
+
test 'should properly format incorrect study url' do
identifier = "#{@public_study.accession}/#{@public_study.url_safe_name}"
base_path = "/single_cell/study/#{identifier}"
diff --git a/test/models/dot_plot_gene_ingest_parameters_test.rb b/test/models/dot_plot_gene_ingest_parameters_test.rb
new file mode 100644
index 0000000000..470ec8eceb
--- /dev/null
+++ b/test/models/dot_plot_gene_ingest_parameters_test.rb
@@ -0,0 +1,66 @@
+require 'test_helper'
+
+class DotPlotGeneIngestParametersTest < ActiveSupport::TestCase
+ before(:all) do
+ cluster_group_id = BSON::ObjectId.new
+ @dense_options = {
+ cell_metadata_file: 'gs://test_bucket/metadata.tsv',
+ cluster_file: 'gs://test_bucket/cluster.tsv',
+ cluster_group_id:,
+ matrix_file_path: 'gs://test_bucket/dense.tsv',
+ matrix_file_type: 'dense',
+ }
+
+ @sparse_options = {
+ cell_metadata_file: 'gs://test_bucket/metadata.tsv',
+ cluster_file: 'gs://test_bucket/cluster.tsv',
+ cluster_group_id:,
+ matrix_file_path: 'gs://test_bucket/sparse.tsv',
+ matrix_file_type: 'mtx',
+ gene_file: 'gs://test_bucket/genes.tsv',
+ barcode_file: 'gs://test_bucket/barcodes.tsv'
+ }
+
+ @anndata_options = {
+ cell_metadata_file: 'gs://test_bucket/metadata.tsv',
+ cluster_file: 'gs://test_bucket/cluster.tsv',
+ cluster_group_id:,
+ matrix_file_path: 'gs://test_bucket/matrix.h5ad',
+ matrix_file_type: 'mtx',
+ gene_file: 'gs://test_bucket/genes.tsv',
+ barcode_file: 'gs://test_bucket/barcodes.tsv'
+ }
+ end
+
+ test 'should create and validate parameters' do
+ [@dense_options, @sparse_options, @anndata_options].each do |options|
+ params = DotPlotGeneIngestParameters.new(**options)
+ assert params.valid?
+ assert_equal DotPlotGeneIngestParameters::PARAM_DEFAULTS[:machine_type], params.machine_type
+ if options[:matrix_file_type] == 'mtx'
+ assert params.gene_file.present?
+ assert params.barcode_file.present?
+ else
+ assert params.gene_file.nil?
+ assert params.barcode_file.nil?
+ end
+ end
+ end
+
+ test 'should find associated cluster group' do
+ user = FactoryBot.create(:user, test_array: @@users_to_clean)
+ study = FactoryBot.create(:detached_study,
+ name_prefix: 'DotPlotGeneIngestParameters Test',
+ user:,
+ test_array: @@studies_to_clean)
+ FactoryBot.create(:cluster_file,
+ name: 'cluster.txt',
+ study:,
+ cell_input: { x: [1, 4, 6], y: [7, 5, 3], cells: %w[A B C] }
+ )
+ cluster_group = ClusterGroup.find_by(study:)
+ new_options = @dense_options.dup.merge(cluster_group_id: cluster_group.id)
+ params = DotPlotGeneIngestParameters.new(**new_options)
+ assert_equal cluster_group, params.cluster_group
+ end
+end
diff --git a/test/services/dot_plot_service_test.rb b/test/services/dot_plot_service_test.rb
index cbcf05ccf4..7846cff709 100644
--- a/test/services/dot_plot_service_test.rb
+++ b/test/services/dot_plot_service_test.rb
@@ -29,6 +29,10 @@ class DotPlotServiceTest < ActiveSupport::TestCase
@cluster_group = @study.cluster_groups.first
end
+ teardown do
+ DotPlotGene.delete_all
+ end
+
test 'should determine study eligibility for preprocessing' do
assert DotPlotService.study_eligible?(@study)
empty_study = FactoryBot.create(:detached_study,
@@ -59,7 +63,86 @@ class DotPlotServiceTest < ActiveSupport::TestCase
assert_empty DotPlotService.study_processed_matrices(empty_study)
end
+ test 'should validate study for dot plot preprocessing' do
+ DotPlotService.validate_study(@study, @cluster_group) # should not raise error
+ empty_study = FactoryBot.create(:detached_study,
+ name_prefix: 'Empty DotPlot',
+ user: @user,
+ test_array: @@studies_to_clean)
+ assert_raise ArgumentError do
+ DotPlotService.validate_study(empty_study, ClusterGroup.new)
+ end
+ end
+
+ test 'should get dense parameters for dot plot gene ingest job' do
+ params = DotPlotService.create_params_object(@cluster_group, @expression_file, @metadata_file)
+ assert_equal params.matrix_file_type, 'dense'
+ assert_equal params.cell_metadata_file, @metadata_file.gs_url
+ assert_equal params.cluster_file, @cluster_file.gs_url
+ assert_equal params.matrix_file_path, @expression_file.gs_url
+ end
+
+ test 'should get sparse parameters for dot plot gene ingest job' do
+ study = FactoryBot.create(:detached_study,
+ name_prefix: 'Sparse DotPlotService Test Study',
+ user: @user,
+ test_array: @@studies_to_clean)
+ cluster_file = FactoryBot.create(:cluster_file,
+ name: 'cluster_example_sparse.txt',
+ study:,
+ cell_input: { x: [1, 2, 3], y: [4, 5, 6] })
+ metadata_file = FactoryBot.create(:metadata_file, name: 'metadata.txt', study:)
+ matrix = FactoryBot.create(:expression_file,
+ name: 'matrix.mtx',
+ file_type: 'MM Coordinate Matrix',
+ study:)
+ gene_file = FactoryBot.create(:study_file, name: 'genes.tsv', file_type: '10X Genes File', study:)
+ barcode_file = FactoryBot.create(:study_file, name: 'barcodes.tsv', file_type: '10X Barcodes File', study:)
+ bundle = StudyFileBundle.new(study:, bundle_type: matrix.file_type)
+ bundle.add_files(matrix, gene_file, barcode_file)
+ bundle.save!
+ cluster_group = study.cluster_groups.first
+ params = DotPlotService.create_params_object(cluster_group, matrix, metadata_file)
+ assert_equal params.matrix_file_type, 'mtx'
+ assert_equal params.cell_metadata_file, metadata_file.gs_url
+ assert_equal params.cluster_file, cluster_file.gs_url
+ assert_equal params.matrix_file_path, matrix.gs_url
+ end
+
+ test 'should get anndata parameters for dot plot gene ingest job' do
+ study = FactoryBot.create(:detached_study,
+ name_prefix: 'AnnData DotPlotService Test Study',
+ user: @user,
+ test_array: @@studies_to_clean)
+ anndata_file = FactoryBot.create(:ann_data_file,
+ name: 'matrix.h5ad',
+ study:,
+ cell_input: %w[A B C D],
+ coordinate_input: [
+ { umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
+ ])
+ cluster_group = study.cluster_groups.first
+ params = DotPlotService.create_params_object(cluster_group, anndata_file, anndata_file)
+ assert_equal 'mtx', params.matrix_file_type
+ assert_equal RequestUtils.data_fragment_url(anndata_file, 'metadata'),
+ params.cell_metadata_file
+ assert_equal RequestUtils.data_fragment_url(anndata_file, 'cluster', file_type_detail: 'X_umap'),
+ params.cluster_file
+ assert_equal RequestUtils.data_fragment_url(anndata_file, 'matrix', file_type_detail: 'processed'),
+ params.matrix_file_path
+ assert_equal RequestUtils.data_fragment_url(anndata_file, 'features', file_type_detail: 'processed'),
+ params.gene_file
+ assert_equal RequestUtils.data_fragment_url(anndata_file, 'barcodes', file_type_detail: 'processed'),
+ params.barcode_file
+ end
+
test 'should run preprocess expression job' do
- assert DotPlotService.run_preprocess_expression_job(@study, @cluster_group, @metadata_file, @expression_file)
+ job_mock = Minitest::Mock.new
+ job_mock.expect(:push_remote_and_launch_ingest, Delayed::Job.new)
+ mock = Minitest::Mock.new
+ mock.expect(:delay, job_mock)
+ IngestJob.stub :new, mock do
+ assert DotPlotService.run_process_dot_plot_genes(@study, @cluster_group, @user)
+ end
end
end