diff --git a/app/javascript/components/search/controls/FiltersBoxSearchable.jsx b/app/javascript/components/search/controls/FiltersBoxSearchable.jsx index 26c0f398e8..d746118762 100644 --- a/app/javascript/components/search/controls/FiltersBoxSearchable.jsx +++ b/app/javascript/components/search/controls/FiltersBoxSearchable.jsx @@ -1,13 +1,13 @@ import React, { useState } from 'react' import { FontAwesomeIcon } from '@fortawesome/react-fontawesome' -import { faExternalLinkAlt, faTimesCircle } from '@fortawesome/free-solid-svg-icons' +import { faExternalLinkAlt, faSearch, faTimesCircle } from '@fortawesome/free-solid-svg-icons' import pluralize from 'pluralize' import _find from 'lodash/find' import _remove from 'lodash/remove' -import { fetchFacetFilters } from '~/lib/scp-api' import FiltersBox from './FiltersBox' -import FiltersSearchBar from './FiltersSearchBar' +import FormControl from 'react-bootstrap/lib/FormControl' +import Button from 'react-bootstrap/lib/Button' /** * Component for filter search and filter lists @@ -15,6 +15,7 @@ import FiltersSearchBar from './FiltersSearchBar' export default function FiltersBoxSearchable({ facet, selection, setSelection, show, setShow, hideControls }) { // State that is specific to FiltersBox const [matchingFilters, setMatchingFilters] = useState(facet.filters) + const [searchText, setSearchText] = useState('') const [hasFilterSearchResults, setHasFilterSearchResults] = useState(false) /* @@ -42,14 +43,17 @@ export default function FiltersBoxSearchable({ facet, selection, setSelection, s * For example, among the many filters in the "Disease" facet, search * for filters matching the term "tuberculosis". */ - async function searchFilters(terms) { - const apiData = await fetchFacetFilters(facet.id, terms) - const matchingFilters = apiData.filters - const hasResults = apiData.query !== '' && matchingFilters.length > 0 + function searchFilters(terms) { + const lcTerms = terms.split(' ').map(text => text.toLowerCase()) + const newFilters = facet.filters.filter(facetFilter => { + return lcTerms.some(lcTerm => { + return facetFilter.name.toLowerCase().includes(lcTerm) + }) + }) + const hasResults = newFilters.length > 0 setHasFilterSearchResults(hasResults) - - setMatchingFilters(matchingFilters) + setMatchingFilters(newFilters) } /** @@ -98,10 +102,30 @@ export default function FiltersBoxSearchable({ facet, selection, setSelection, s show &&
{ showSearchBar && ( <> - +
+
+ { + setSearchText(e.target.value) + searchFilters(e.target.value) + } + } + /> +
+ +
+
+
{ selectedFilterBadges }
diff --git a/app/javascript/components/search/controls/FiltersSearchBar.jsx b/app/javascript/components/search/controls/FiltersSearchBar.jsx deleted file mode 100644 index 1401caffeb..0000000000 --- a/app/javascript/components/search/controls/FiltersSearchBar.jsx +++ /dev/null @@ -1,44 +0,0 @@ -import React, { useState } from 'react' -import Form from 'react-bootstrap/lib/Form' -import FormControl from 'react-bootstrap/lib/FormControl' -import { FontAwesomeIcon } from '@fortawesome/react-fontawesome' -import { faSearch } from '@fortawesome/free-solid-svg-icons' -import Button from 'react-bootstrap/lib/Button' - -/** - * Component to search filters within a given facet - * Used when facet has many available filters (e.g. disease) - */ -export default function FiltersSearchBar({ searchFilters, filtersBoxId }) { - const [searchText, setSearchText] = useState('') - - /** perform a search for matching filters */ - async function handleFilterSearchSubmit(event) { - event.preventDefault() // catch keyboard return and prevent form submit - await searchFilters(searchText) - } - - return ( -
-
- setSearchText(e.target.value)} - /> -
- -
- -
- ) -} diff --git a/app/lib/differential_expression_service.rb b/app/lib/differential_expression_service.rb index 3de7d03463..638ba46766 100644 --- a/app/lib/differential_expression_service.rb +++ b/app/lib/differential_expression_service.rb @@ -131,7 +131,7 @@ def self.run_differential_expression_job(cluster_group, study, user, annotation_ de_type: 'rest', group1: nil, group2: nil, machine_type: nil, dry_run: nil) validate_study(study) validate_annotation(cluster_group, study, annotation_name, annotation_scope, group1:, group2:) - cluster_url = cluster_file_url(cluster_group) + cluster_url = RequestUtils.cluster_file_url(cluster_group) study_file = cluster_group.study_file metadata_url = study_file.is_viz_anndata? ? RequestUtils.data_fragment_url(study_file, 'metadata') : @@ -480,23 +480,6 @@ def self.encode_filename(values) values.map { |val| val.gsub(/\+/, 'pos').gsub(/\W/, '_') }.join('--') end - # return a GS URL for a requested ClusterGroup, depending on file type - # - # * *params* - # - +cluster_group+ (ClusterGroup) => Clustering object to source name/file from - # - # * *returns* - # - (String) - def self.cluster_file_url(cluster_group) - study_file = cluster_group.study_file - if study_file.is_viz_anndata? - data_frag = study_file.ann_data_file_info.find_fragment(data_type: :cluster, name: cluster_group.name) - RequestUtils.data_fragment_url(study_file, 'cluster', file_type_detail: data_frag[:obsm_key_name]) - else - study_file.gs_url - end - end - # retrieve the weekly user quota value # # * *returns* diff --git a/app/lib/dot_plot_service.rb b/app/lib/dot_plot_service.rb index 39b26515cd..885100358c 100644 --- a/app/lib/dot_plot_service.rb +++ b/app/lib/dot_plot_service.rb @@ -1,27 +1,81 @@ -# frozen_string_literal: true - # service that handles preprocessing expression/annotation data to speed up dot plot rendering class DotPlotService - # main handler for launching ingest job to process expression data + # main handler for launching ingest job to process expression data into DotPlotGene objects + # since the study can have only one processed matrix/metadata file, this will only run if the study is eligible # # * *params* # - +study+ (Study) => the study that owns the data - # - +cluster_group+ (ClusterGroup) => the cluster to source cell names from - # - +annotation_file+ (StudyFile) => the StudyFile containing annotation data - # - +expression_file+ (StudyFile) => the StudyFile to source data from + # - +cluster_group+ (ClusterGroup) => the cluster to set associations for + # - +user+ (User) => the user that will run the job # # * *yields* # - (IngestJob) => the job that will be run to process the data - def self.run_preprocess_expression_job(study, cluster_group, annotation_file, expression_file) - study_eligible?(study) # method stub, waiting for scp-ingest-pipeline implementation + def self.run_process_dot_plot_genes(study, cluster_group, user) + validate_study(study, cluster_group) + expression_file = study_processed_matrices(study)&.first + metadata_file = study.metadata_file + validate_source_data(expression_file, metadata_file) + params_object = create_params_object(cluster_group, expression_file, metadata_file) + if params_object.valid? + job = IngestJob.new( + study:, study_file: expression_file, user:, action: :ingest_dot_plot_genes, params_object: + ) + job.delay.push_remote_and_launch_ingest + true + else + raise ArgumentError, "job parameters failed to validate: #{params_object.errors.full_messages.join(', ')}" + end + end + + # create DotPlotGeneIngestParameters object based on the provided files + # + # * *params* + # - +cluster_group+ (ClusterGroup) => the cluster group to associate with + # - +expression_file+ (StudyFile) => the expression matrix file to process + # - +metadata_file+ (StudyFile) => the metadata file to source annotations + # + # * *returns* + # - (DotPlotGeneIngestParameters) => a parameters object with the necessary file paths and metadata + def self.create_params_object(cluster_group, expression_file, metadata_file) + params = { + cluster_group_id: cluster_group.id, + cluster_file: RequestUtils.cluster_file_url(cluster_group) + } + case expression_file.file_type + when 'Expression Matrix' + params[:matrix_file_type] = 'dense' + params[:matrix_file_path] = expression_file.gs_url + params[:cell_metadata_file] = metadata_file.gs_url + when 'MM Coordinate Matrix' + params[:matrix_file_type] = 'mtx' + genes_file = expression_file.bundled_files.detect { |f| f.file_type == '10X Genes File' } + barcodes_file = expression_file.bundled_files.detect { |f| f.file_type == '10X Barcodes File' } + params[:matrix_file_path] = expression_file.gs_url + params[:cell_metadata_file] = metadata_file.gs_url + params[:gene_file] = genes_file.gs_url + params[:barcode_file] = barcodes_file.gs_url + when 'AnnData' + params[:matrix_file_type] = 'mtx' # extracted expression for AnnData is in MTX format + params[:cell_metadata_file] = RequestUtils.data_fragment_url(metadata_file, 'metadata') + params[:matrix_file_path] = RequestUtils.data_fragment_url( + expression_file, 'matrix', file_type_detail: 'processed' + ) + params[:gene_file] = RequestUtils.data_fragment_url( + expression_file, 'features', file_type_detail: 'processed' + ) + params[:barcode_file] = RequestUtils.data_fragment_url( + expression_file, 'barcodes', file_type_detail: 'processed' + ) + end + DotPlotGeneIngestParameters.new(**params) end # determine study eligibility - can only have one processed matrix and be able to visualize clusters # # * *params* - # - +study+ (Study) the study that owns the data + # - +study+ (Study) => the study that owns the data # * *returns* - # - (Boolean) true if the study is eligible for dot plot visualization + # - (Boolean) => true if the study is eligible for dot plot visualization def self.study_eligible?(study) processed_matrices = study_processed_matrices(study) study.can_visualize_clusters? && study.has_expression_data? && processed_matrices.size == 1 @@ -29,11 +83,11 @@ def self.study_eligible?(study) # check if the given study/cluster has already been preprocessed # * *params* - # - +study+ (Study) the study that owns the data - # - +cluster_group+ (ClusterGroup) the cluster to check for processed data + # - +study+ (Study) => the study that owns the data + # - +cluster_group+ (ClusterGroup) => the cluster to check for processed data # # * *returns* - # - (Boolean) true if the study/cluster has already been processed + # - (Boolean) => true if the study/cluster has already been processed def self.cluster_processed?(study, cluster_group) DotPlotGene.where(study:, cluster_group:).exists? end @@ -41,67 +95,41 @@ def self.cluster_processed?(study, cluster_group) # get processed expression matrices for a study # # * *params* - # - +study+ (Study) the study to get matrices for + # - +study+ (Study) => the study to get matrices for # # * *returns* - # - (Array) an array of processed expression matrices for the study + # - (Array) => an array of processed expression matrices for the study def self.study_processed_matrices(study) study.expression_matrices.select do |matrix| matrix.is_viz_anndata? || !matrix.is_raw_counts_file? end end - # seeding method for testing purposes, will be removed once pipeline is in place - # data is random and not representative of actual expression data - def self.seed_dot_plot_genes(study) - return false unless study_eligible?(study) - - DotPlotGene.where(study_id: study.id).delete_all - puts "Seeding dot plot genes for #{study.accession}" - expression_matrix = study.expression_matrices.first - print 'assembling genes and annotations...' - genes = Gene.where(study:, study_file: expression_matrix).pluck(:name) - annotations = AnnotationVizService.available_metadata_annotations( - study, annotation_type: 'group' - ).reject { |a| a[:scope] == 'invalid' } - puts " done. Found #{genes.size} genes and #{annotations.size} study-wide annotations." - study.cluster_groups.each do |cluster_group| - next if cluster_processed?(study, cluster_group) + # validate the study for dot plot preprocessing + # + # * *params* + # - +study+ (Study) => the study to validate + # + # * *raises* + # - (ArgumentError) => if the study is invalid or does not qualify for dot plot visualization + def self.validate_study(study, cluster_group) + raise ArgumentError, 'Invalid study' unless study.present? && study.is_a?(Study) + raise ArgumentError, 'Study does not qualify for dot plot visualization' unless study_eligible?(study) + raise ArgumentError, 'Study has already been processed' if cluster_processed?(study, cluster_group) + end - cluster_annotations = ClusterVizService.available_annotations_by_cluster( - cluster_group, 'group' - ).reject { |a| a[:scope] == 'invalid' } - all_annotations = annotations + cluster_annotations - puts "Processing #{cluster_group.name} with #{all_annotations.size} annotations." - documents = [] - genes.each do |gene| - exp_scores = all_annotations.map do |annotation| - { - "#{annotation[:name]}--#{annotation[:type]}--#{annotation[:scope]}" => annotation[:values].map do |value| - { value => [rand.round(3), rand.round(3)] } - end.reduce({}, :merge) - } - end.reduce({}, :merge) - documents << DotPlotGene.new( - study:, study_file: expression_matrix, cluster_group:, gene_symbol: gene, searchable_gene: gene.downcase, - exp_scores: - ).attributes - if documents.size == 1000 - DotPlotGene.collection.insert_many(documents) - count = DotPlotGene.where(study_id: study.id, cluster_group_id: cluster_group.id).size - puts "Inserted #{count}/#{genes.size} DotPlotGenes for #{cluster_group.name}." - documents.clear - end - end - DotPlotGene.collection.insert_many(documents) - count = DotPlotGene.where(study_id: study.id, cluster_group_id: cluster_group.id).size - puts "Inserted #{count}/#{genes.size} DotPlotGenes for #{cluster_group.name}." - puts "Finished processing #{cluster_group.name}" - end - puts "Seeding complete for #{study.accession}, #{DotPlotGene.where(study_id: study.id).size} DotPlotGenes created." - true - rescue StandardError => e - puts "Error seeding DotPlotGenes in #{study.accession}: #{e.message}" - false + # validate required data is present for processing + # + # * *params* + # - +expression_file+ (StudyFile) => the expression matrix file to process + # - +metadata_file+ (StudyFile) => the metadata file to source annotations + # + # * *raises* + # - (ArgumentError) => if the source data is not fully parsed or MTX bundled is not completed + def self.validate_source_data(expression_file, metadata_file) + raise ArgumentError, 'Missing required files' unless expression_file.present? && metadata_file.present? + raise ArgumentError, 'Source data not fully parsed' unless expression_file.parsed? && metadata_file.parsed? + raise ArgumentError, 'MTX bundled not completed' if expression_file.should_bundle? && + !expression_file.has_completed_bundle? end end diff --git a/app/lib/request_utils.rb b/app/lib/request_utils.rb index a3f1cfcf80..c529278c08 100644 --- a/app/lib/request_utils.rb +++ b/app/lib/request_utils.rb @@ -154,6 +154,24 @@ def self.data_fragment_url(ann_data_file, fragment_type, gs_url: true, file_type "#{url}.#{ext}.gz" end + # return a GS URL for a requested ClusterGroup, depending on file type + # + # * *params* + # - +cluster_group+ (ClusterGroup) => Clustering object to source name/file from + # + # * *returns* + # - (String) + def self.cluster_file_url(cluster_group) + study_file = cluster_group.study_file + if study_file.is_viz_anndata? + data_frag = study_file.ann_data_file_info.find_fragment(data_type: :cluster, name: cluster_group.name) + safe_frag = data_frag.with_indifferent_access + data_fragment_url(study_file, 'cluster', file_type_detail: safe_frag[:obsm_key_name]) + else + study_file.gs_url + end + end + # extracts an array of genes from a comma-delimited string list of gene names def self.get_genes_from_param(study, gene_param) terms = RequestUtils.sanitize_search_terms(gene_param).split(',') diff --git a/app/models/ann_data_file_info.rb b/app/models/ann_data_file_info.rb index def5a7524b..c17f717d91 100644 --- a/app/models/ann_data_file_info.rb +++ b/app/models/ann_data_file_info.rb @@ -126,7 +126,14 @@ def merge_form_fragments(form_data, fragments) # also supports finding values as both strings and symbols (for data_type values) def find_fragment(**attrs) data_fragments.detect do |fragment| - !{ **attrs }.map { |k, v| fragment[k] == v || fragment[k] == v.send(transform_for(v)) }.include?(false) + !{ **attrs }.map do |k, v| + safe_v = v.send(transform_for(v)) + safe_k = k.send(transform_for(k)) + fragment[k] == v || + fragment[k] == safe_v || + fragment[safe_k] == v || + fragment[safe_k] == safe_v + end.include?(false) end end diff --git a/app/models/batch_api_client.rb b/app/models/batch_api_client.rb index 807b6aa9ef..4ef5eb8c32 100644 --- a/app/models/batch_api_client.rb +++ b/app/models/batch_api_client.rb @@ -21,7 +21,8 @@ class BatchApiClient ingest_differential_expression: ['Differential Expression'], render_expression_arrays: %w[Cluster], image_pipeline: %w[Cluster], - ingest_anndata: %w[AnnData] + ingest_anndata: %w[AnnData], + ingest_dot_plot_genes: ['Expression Matrix', 'MM Coordinate Matrix', 'AnnData'] }.freeze # default GCE machine_type diff --git a/app/models/dot_plot_gene_ingest_parameters.rb b/app/models/dot_plot_gene_ingest_parameters.rb new file mode 100644 index 0000000000..ae52f5d7af --- /dev/null +++ b/app/models/dot_plot_gene_ingest_parameters.rb @@ -0,0 +1,52 @@ +# class to hold parameters specific to ingest job for computing dot plot gene metrics +class DotPlotGeneIngestParameters + include ActiveModel::Model + include Parameterizable + + PARAMETER_NAME = '--ingest-dot-plot-genes' + + # cell_metadata_file: metadata file to source annotations + # cluster_file: clustering file with cells to use as control list for filtering and optional annotations + # cluster_group_id: BSON ID of ClusterGroup object for associations + # matrix_file_path: expression matrix with source data + # matrix_file_type: type of expression matrix (dense, sparse) + # gene_file (optional): genes/features file for sparse matrix + # barcode_file (optional): barcodes file for sparse matrix + # machine_type (optional): override for default ingest machine type (uses 'n2d-highmem-8') + PARAM_DEFAULTS = { + cell_metadata_file: nil, + cluster_file: nil, + cluster_group_id: nil, + matrix_file_path: nil, + matrix_file_type: nil, + gene_file: nil, + barcode_file: nil, + machine_type: 'n2d-highmem-8' + }.freeze + + # values that are available as methods but not as attributes (and not passed to command line) + NON_ATTRIBUTE_PARAMS = %i[machine_type].freeze + + attr_accessor(*PARAM_DEFAULTS.keys) + + validates :cell_metadata_file, :cluster_file, :cluster_group_id, :matrix_file_path, :matrix_file_type, presence: true + validates :cell_metadata_file, :cluster_file, :matrix_file_path, + format: { with: Parameterizable::GS_URL_REGEXP, message: 'is not a valid GS url' } + validates :matrix_file_type, inclusion: %w[dense mtx] + validates :machine_type, inclusion: Parameterizable::GCE_MACHINE_TYPES + validates :gene_file, :barcode_file, + presence: true, + format: { + with: Parameterizable::GS_URL_REGEXP, + message: 'is not a valid GS url' + }, + if: -> { matrix_file_type == 'mtx' } + + def initialize(attributes = nil) + super + end + + def cluster_group + ClusterGroup.find(cluster_group_id) + end +end diff --git a/app/models/ingest_job.rb b/app/models/ingest_job.rb index bc017da5f4..e7c042737d 100644 --- a/app/models/ingest_job.rb +++ b/app/models/ingest_job.rb @@ -15,7 +15,7 @@ class IngestJob # valid ingest actions to perform VALID_ACTIONS = %i[ ingest_expression ingest_cluster ingest_cell_metadata ingest_anndata ingest_differential_expression ingest_subsample - differential_expression render_expression_arrays + ingest_dot_plot_genes differential_expression render_expression_arrays ].freeze # Mappings between actions & models (for cleaning up data on re-parses) @@ -24,7 +24,8 @@ class IngestJob ingest_cluster: ClusterGroup, ingest_cell_metadata: CellMetadatum, ingest_differential_expression: DifferentialExpressionResult, - ingest_subsample: ClusterGroup + ingest_subsample: ClusterGroup, + ingest_dot_plot_genes: DotPlotGene }.freeze # non-standard job actions where data is not being read from a file to insert into MongoDB @@ -37,7 +38,7 @@ class IngestJob # jobs that need parameters objects in order to launch correctly PARAMS_OBJ_REQUIRED = %i[ - differential_expression render_expression_arrays image_pipeline ingest_anndata + differential_expression render_expression_arrays image_pipeline ingest_anndata ingest_dot_plot_genes ].freeze # Name of pipeline submission running in GCP (from [BatchApiClient#run_job]) @@ -393,8 +394,11 @@ def poll_for_completion(run_at: 1.minute.from_now) set_study_state_after_ingest study_file.invalidate_cache_by_file_type # clear visualization caches for file log_to_mixpanel - if action == :differential_expression + case action.to_sym + when :differential_expression subject = "Differential expression analysis for #{study_file.file_type} file: '#{study_file.upload_file_name}' has completed processing" + when :ingest_dot_plot_genes + subject = "Dot plot gene metrics for #{study_file.file_type} file: '#{study_file.upload_file_name}' parse has completed" else subject = "#{study_file.file_type} file: '#{study_file.upload_file_name}' has completed parsing" end @@ -415,8 +419,11 @@ def poll_for_completion(run_at: 1.minute.from_now) log_error_messages log_to_mixpanel # log before queuing file for deletion to preserve properties # don't delete files or notify users if this is a 'special action', like DE or image pipeline jobs - if action == :differential_expression + case action.to_sym + when :differential_expression subject = "Error: Differential expression analysis for #{study_file.file_type} file: '#{study_file.upload_file_name}' has failed processing" + when :ingest_dot_plot_genes + subject = "Error: Dot plot gene metrics for #{study_file.file_type} file: '#{study_file.upload_file_name}' parse has failed" else subject = "Error: #{study_file.file_type} file: '#{study_file.upload_file_name}' parse has failed" end @@ -450,12 +457,16 @@ def poll_for_completion(run_at: 1.minute.from_now) # in case of subsampling, only subsampled data cleanup is run and all other data is left in place # this reduces churn for study owners as full-resolution data is still valid def handle_ingest_failure(email_subject) - if action.to_sym == :ingest_subsample + case action.to_sym + when :ingest_subsample study_file.update(parse_status: 'parsed') # reset parse flag cluster_name = cluster_name_by_file_type cluster = ClusterGroup.find_by(name: cluster_name, study:, study_file:) cluster.find_subsampled_data_arrays&.delete_all cluster.update(subsampled: false, is_subsampling: false) + when :ingest_dot_plot_genes + cluster = params_object.cluster_group + DotPlotGene.where(study_id: study.id, cluster_group_id: cluster.id, study_file_id: study_file.id).delete_all else create_study_file_copy study_file.update(parse_status: 'failed') @@ -1144,6 +1155,7 @@ def extracted_raw_counts?(job) def skip_anndata_summary? study_file.has_anndata_summary? || action == :differential_expression || + action == :ingest_dot_plot_genes || should_retry? || (!failed? && action == :ingest_anndata) end @@ -1254,6 +1266,13 @@ def generate_success_email_array complete_pipeline_runtime = TimeDifference.between(*get_image_pipeline_timestamps).humanize message << "Image Pipeline image rendering completed for \"#{params_object.cluster}\"" message << "Complete runtime (data cache & image rendering): #{complete_pipeline_runtime}" + when :ingest_dot_plot_genes + cluster_group = params_object.cluster_group + genes = DotPlotGene.where(study:, study_file:, cluster_group:).count + message << "Dot plot gene preprocessing completed for cluster: #{cluster_group.name}" + message << "Total genes created: #{genes}" + else + message << "Ingest job completed for #{study_file.upload_file_name}" end message end diff --git a/app/models/search_facet.rb b/app/models/search_facet.rb index 9f84e83e1d..9aa7b1c1a6 100644 --- a/app/models/search_facet.rb +++ b/app/models/search_facet.rb @@ -405,7 +405,8 @@ def get_unique_filter_values(public_only: false) end return filter_map if is_numeric? - filter_map.uniq { |filter| [filter[:id]&.downcase, filter[:name]&.downcase] }.reject do |filter| + # some filters will have the same ontology id but different label, so take the first to prevent duplicates + filter_map.uniq { |filter| [filter[:id]&.downcase] }.reject do |filter| filter[:id].blank? || filter[:name].blank? end end diff --git a/app/models/study.rb b/app/models/study.rb index d6b690948f..b54e7ea43c 100644 --- a/app/models/study.rb +++ b/app/models/study.rb @@ -143,6 +143,8 @@ def by_name(name) end end + has_many :dot_plot_genes, dependent: :delete_all + has_many :study_shares, dependent: :destroy do def can_edit where(permission: 'Edit').map(&:email) diff --git a/config/application.rb b/config/application.rb index 087e61af9b..cd3478775c 100644 --- a/config/application.rb +++ b/config/application.rb @@ -29,7 +29,7 @@ class Application < Rails::Application config.middleware.use Rack::Brotli # Docker image for file parsing via scp-ingest-pipeline - config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.42.0' + config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.43.0' # Docker image for image pipeline jobs config.image_pipeline_docker_image = 'gcr.io/broad-singlecellportal-staging/image-pipeline:0.1.0_c2b090043' diff --git a/test/lib/request_utils_test.rb b/test/lib/request_utils_test.rb index 4cce0ccd2c..5ad78457ec 100644 --- a/test/lib/request_utils_test.rb +++ b/test/lib/request_utils_test.rb @@ -195,6 +195,31 @@ class RequestUtilsTest < ActionDispatch::IntegrationTest end end + test 'should construct cluster file url' do + anndata_file = FactoryBot.create(:ann_data_file, + name: 'anndata.h5ad', + study: @private_study, + cell_input: %w[A B C D], + coordinate_input: [ + { umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } } + ]) + cluster_group_umap = ClusterGroup.find_by(study_file: anndata_file, name: 'umap') + expected_url = "gs://#{@private_study.bucket_id}/_scp_internal/anndata_ingest/" \ + "#{@private_study.accession}_#{anndata_file.id}/h5ad_frag.cluster.X_umap.tsv.gz" + assert_equal expected_url, RequestUtils.cluster_file_url(cluster_group_umap) + cluster_file = FactoryBot.create(:cluster_file, + name: 'cluster.tsv.gz', + study: @private_study, + cell_input: { + x: [1, 2, 3], + y: [1, 2, 3], + cells: %w[A B C] + }) + expected_cluster_url = "gs://#{@private_study.bucket_id}/cluster.tsv.gz" + cluster_group = ClusterGroup.find_by(study_file: cluster_file, name: 'cluster.tsv.gz') + assert_equal expected_cluster_url, RequestUtils.cluster_file_url(cluster_group) + end + test 'should properly format incorrect study url' do identifier = "#{@public_study.accession}/#{@public_study.url_safe_name}" base_path = "/single_cell/study/#{identifier}" diff --git a/test/models/dot_plot_gene_ingest_parameters_test.rb b/test/models/dot_plot_gene_ingest_parameters_test.rb new file mode 100644 index 0000000000..470ec8eceb --- /dev/null +++ b/test/models/dot_plot_gene_ingest_parameters_test.rb @@ -0,0 +1,66 @@ +require 'test_helper' + +class DotPlotGeneIngestParametersTest < ActiveSupport::TestCase + before(:all) do + cluster_group_id = BSON::ObjectId.new + @dense_options = { + cell_metadata_file: 'gs://test_bucket/metadata.tsv', + cluster_file: 'gs://test_bucket/cluster.tsv', + cluster_group_id:, + matrix_file_path: 'gs://test_bucket/dense.tsv', + matrix_file_type: 'dense', + } + + @sparse_options = { + cell_metadata_file: 'gs://test_bucket/metadata.tsv', + cluster_file: 'gs://test_bucket/cluster.tsv', + cluster_group_id:, + matrix_file_path: 'gs://test_bucket/sparse.tsv', + matrix_file_type: 'mtx', + gene_file: 'gs://test_bucket/genes.tsv', + barcode_file: 'gs://test_bucket/barcodes.tsv' + } + + @anndata_options = { + cell_metadata_file: 'gs://test_bucket/metadata.tsv', + cluster_file: 'gs://test_bucket/cluster.tsv', + cluster_group_id:, + matrix_file_path: 'gs://test_bucket/matrix.h5ad', + matrix_file_type: 'mtx', + gene_file: 'gs://test_bucket/genes.tsv', + barcode_file: 'gs://test_bucket/barcodes.tsv' + } + end + + test 'should create and validate parameters' do + [@dense_options, @sparse_options, @anndata_options].each do |options| + params = DotPlotGeneIngestParameters.new(**options) + assert params.valid? + assert_equal DotPlotGeneIngestParameters::PARAM_DEFAULTS[:machine_type], params.machine_type + if options[:matrix_file_type] == 'mtx' + assert params.gene_file.present? + assert params.barcode_file.present? + else + assert params.gene_file.nil? + assert params.barcode_file.nil? + end + end + end + + test 'should find associated cluster group' do + user = FactoryBot.create(:user, test_array: @@users_to_clean) + study = FactoryBot.create(:detached_study, + name_prefix: 'DotPlotGeneIngestParameters Test', + user:, + test_array: @@studies_to_clean) + FactoryBot.create(:cluster_file, + name: 'cluster.txt', + study:, + cell_input: { x: [1, 4, 6], y: [7, 5, 3], cells: %w[A B C] } + ) + cluster_group = ClusterGroup.find_by(study:) + new_options = @dense_options.dup.merge(cluster_group_id: cluster_group.id) + params = DotPlotGeneIngestParameters.new(**new_options) + assert_equal cluster_group, params.cluster_group + end +end diff --git a/test/services/dot_plot_service_test.rb b/test/services/dot_plot_service_test.rb index cbcf05ccf4..7846cff709 100644 --- a/test/services/dot_plot_service_test.rb +++ b/test/services/dot_plot_service_test.rb @@ -29,6 +29,10 @@ class DotPlotServiceTest < ActiveSupport::TestCase @cluster_group = @study.cluster_groups.first end + teardown do + DotPlotGene.delete_all + end + test 'should determine study eligibility for preprocessing' do assert DotPlotService.study_eligible?(@study) empty_study = FactoryBot.create(:detached_study, @@ -59,7 +63,86 @@ class DotPlotServiceTest < ActiveSupport::TestCase assert_empty DotPlotService.study_processed_matrices(empty_study) end + test 'should validate study for dot plot preprocessing' do + DotPlotService.validate_study(@study, @cluster_group) # should not raise error + empty_study = FactoryBot.create(:detached_study, + name_prefix: 'Empty DotPlot', + user: @user, + test_array: @@studies_to_clean) + assert_raise ArgumentError do + DotPlotService.validate_study(empty_study, ClusterGroup.new) + end + end + + test 'should get dense parameters for dot plot gene ingest job' do + params = DotPlotService.create_params_object(@cluster_group, @expression_file, @metadata_file) + assert_equal params.matrix_file_type, 'dense' + assert_equal params.cell_metadata_file, @metadata_file.gs_url + assert_equal params.cluster_file, @cluster_file.gs_url + assert_equal params.matrix_file_path, @expression_file.gs_url + end + + test 'should get sparse parameters for dot plot gene ingest job' do + study = FactoryBot.create(:detached_study, + name_prefix: 'Sparse DotPlotService Test Study', + user: @user, + test_array: @@studies_to_clean) + cluster_file = FactoryBot.create(:cluster_file, + name: 'cluster_example_sparse.txt', + study:, + cell_input: { x: [1, 2, 3], y: [4, 5, 6] }) + metadata_file = FactoryBot.create(:metadata_file, name: 'metadata.txt', study:) + matrix = FactoryBot.create(:expression_file, + name: 'matrix.mtx', + file_type: 'MM Coordinate Matrix', + study:) + gene_file = FactoryBot.create(:study_file, name: 'genes.tsv', file_type: '10X Genes File', study:) + barcode_file = FactoryBot.create(:study_file, name: 'barcodes.tsv', file_type: '10X Barcodes File', study:) + bundle = StudyFileBundle.new(study:, bundle_type: matrix.file_type) + bundle.add_files(matrix, gene_file, barcode_file) + bundle.save! + cluster_group = study.cluster_groups.first + params = DotPlotService.create_params_object(cluster_group, matrix, metadata_file) + assert_equal params.matrix_file_type, 'mtx' + assert_equal params.cell_metadata_file, metadata_file.gs_url + assert_equal params.cluster_file, cluster_file.gs_url + assert_equal params.matrix_file_path, matrix.gs_url + end + + test 'should get anndata parameters for dot plot gene ingest job' do + study = FactoryBot.create(:detached_study, + name_prefix: 'AnnData DotPlotService Test Study', + user: @user, + test_array: @@studies_to_clean) + anndata_file = FactoryBot.create(:ann_data_file, + name: 'matrix.h5ad', + study:, + cell_input: %w[A B C D], + coordinate_input: [ + { umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } } + ]) + cluster_group = study.cluster_groups.first + params = DotPlotService.create_params_object(cluster_group, anndata_file, anndata_file) + assert_equal 'mtx', params.matrix_file_type + assert_equal RequestUtils.data_fragment_url(anndata_file, 'metadata'), + params.cell_metadata_file + assert_equal RequestUtils.data_fragment_url(anndata_file, 'cluster', file_type_detail: 'X_umap'), + params.cluster_file + assert_equal RequestUtils.data_fragment_url(anndata_file, 'matrix', file_type_detail: 'processed'), + params.matrix_file_path + assert_equal RequestUtils.data_fragment_url(anndata_file, 'features', file_type_detail: 'processed'), + params.gene_file + assert_equal RequestUtils.data_fragment_url(anndata_file, 'barcodes', file_type_detail: 'processed'), + params.barcode_file + end + test 'should run preprocess expression job' do - assert DotPlotService.run_preprocess_expression_job(@study, @cluster_group, @metadata_file, @expression_file) + job_mock = Minitest::Mock.new + job_mock.expect(:push_remote_and_launch_ingest, Delayed::Job.new) + mock = Minitest::Mock.new + mock.expect(:delay, job_mock) + IngestJob.stub :new, mock do + assert DotPlotService.run_process_dot_plot_genes(@study, @cluster_group, @user) + end end end