diff --git a/Gemfile.lock b/Gemfile.lock index 5961226b4..6ddfe014f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -339,11 +339,11 @@ GEM net-protocol netrc (0.11.0) nio4r (2.7.4) - nokogiri (1.18.4-arm64-darwin) + nokogiri (1.18.8-arm64-darwin) racc (~> 1.4) - nokogiri (1.18.4-x86_64-darwin) + nokogiri (1.18.8-x86_64-darwin) racc (~> 1.4) - nokogiri (1.18.4-x86_64-linux-gnu) + nokogiri (1.18.8-x86_64-linux-gnu) racc (~> 1.4) oauth2 (1.4.7) faraday (>= 0.8, < 2.0) diff --git a/app/javascript/lib/validation/ontology-validation.js b/app/javascript/lib/validation/ontology-validation.js index 1d7661844..fe3250622 100644 --- a/app/javascript/lib/validation/ontology-validation.js +++ b/app/javascript/lib/validation/ontology-validation.js @@ -12,9 +12,7 @@ import { decompressSync, strFromU8 } from 'fflate' -import { - metadataSchema, REQUIRED_CONVENTION_COLUMNS -} from './shared-validation' +import { metadataSchema } from './shared-validation' // TODO: Replace "development" with "main" after next ingest release const ONTOLOGY_BASE_URL = @@ -136,24 +134,32 @@ export async function fetchOntologies() { return ontologies } -/** Get lowercase shortnames for all required ontologies */ -function getOntologyShortNames() { - let requiredOntologies = [] - - // Validate IDs for species, organ, disease, and library preparation protocol - for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) { - const column = REQUIRED_CONVENTION_COLUMNS[i] - if (!column.endsWith('__ontology_label')) {continue} - const key = column.split('__ontology_label')[0] - const ontologies = getAcceptedOntologies(key, metadataSchema) - requiredOntologies = requiredOntologies.concat(ontologies) - } +/** Get lowercase shortnames for all supported ontologies */ +export function getOntologyShortNames() { + let supportedOntologies = [] - requiredOntologies = Array.from( - new Set(requiredOntologies.map(o => o.toLowerCase())) - ) + // get all ontology-based properties, ignoring organ_region as it isn't minified + const properties = getOntologyBasedProps() + for (let i = 0; i < properties.length; i++) { + const prop = properties[i] + const ontologies = getAcceptedOntologies(prop, metadataSchema) + supportedOntologies = supportedOntologies.concat(ontologies) + } + return Array.from(new Set(supportedOntologies.map(o => o.toLowerCase()))) +} - return requiredOntologies +/** get all metadata properties that are ontology-based */ +export function getOntologyBasedProps() { + const ontologyProps = [] + // ignore organ_region as it isn't a supported minified ontology + const properties = Object.keys(metadataSchema.properties).filter(p => p !== 'organ_region') + for (let i = 0; i < properties.length; i++) { + const prop = properties[i] + if (metadataSchema.properties[prop].ontology) { + ontologyProps.push(prop) + } + } + return ontologyProps } /** @@ -168,7 +174,7 @@ export function getAcceptedOntologies(key, metadataSchema) { const acceptedOntologies = olsUrls?.split(',').map(url => url.split('/').slice(-1)[0].toUpperCase()) - if (acceptedOntologies.includes('NCBITAXON')) { + if (acceptedOntologies && acceptedOntologies.includes('NCBITAXON')) { acceptedOntologies.push('NCBITaxon') } diff --git a/app/javascript/lib/validation/shared-validation.js b/app/javascript/lib/validation/shared-validation.js index 8f902120d..9bc50b00a 100644 --- a/app/javascript/lib/validation/shared-validation.js +++ b/app/javascript/lib/validation/shared-validation.js @@ -313,3 +313,13 @@ export function timeOutCSFV(chunker) { } return issues } + +/** get ontology shortname from an identifier */ +export function getOntologyShortNameLc(identifier) { + return identifier.split(/[_:]/)[0].toLowerCase() +} + +export function getLabelSuffixForOntology(indentifier) { + const shortName = getOntologyShortNameLc(indentifier) + return shortName === 'uo' ? '_label' : '__ontology_label' +} diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 7d71a0ac7..4b5329fdf 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -3,10 +3,12 @@ import { openH5File } from 'hdf5-indexed-reader' import { getOAuthToken } from '~/lib/scp-api' import { validateUnique, validateRequiredMetadataColumns, - validateAlphanumericAndUnderscores, + validateAlphanumericAndUnderscores, getOntologyShortNameLc, metadataSchema, REQUIRED_CONVENTION_COLUMNS } from './shared-validation' -import { getAcceptedOntologies, fetchOntologies } from './ontology-validation' +import { fetchOntologies, getOntologyBasedProps, getAcceptedOntologies } from './ontology-validation' + +const ONTOLOGY_PROPS = getOntologyBasedProps() /** Get ontology ID values for key in AnnData file */ async function getOntologyIds(key, hdf5File) { @@ -26,7 +28,7 @@ async function getOntologyIds(key, hdf5File) { if (internalCategories) { resolvedCategories = await Promise.all(internalCategories.values) } - const group = resolvedCategories.find(o => o.name.endsWith(key)) + const group = resolvedCategories.find(o => findMatchingGroup(o, key)) if (group) { let categories if (internalCategories) { @@ -40,6 +42,11 @@ async function getOntologyIds(key, hdf5File) { return ontologyIds } +/** find a group in /obs based on exact name match */ +export function findMatchingGroup(category, key) { + return category.name.split('/').slice(-1)[0] === key +} + /** Get annotation headers for a key (e.g. obs) from an HDF5 file */ async function getAnnotationHeaders(key, hdf5File) { const obsGroup = await hdf5File.get(key) @@ -122,7 +129,7 @@ export function checkOntologyIdFormat(key, ontologyIds) { } /** Validate author's annotation labels and IDs match those in ontologies */ -async function checkOntologyLabelsAndIds(key, ontologies, groups) { +export async function checkOntologyLabelsAndIds(key, ontologies, groups) { const [ids, idIndexes, labels, labelIndexes] = groups const issues = [] @@ -138,7 +145,7 @@ async function checkOntologyLabelsAndIds(key, ontologies, groups) { rawUniques.map(r => { let [id, label] = r.split(' || ') - const ontologyShortNameLc = id.split(/[_:]/)[0].toLowerCase() + const ontologyShortNameLc = getOntologyShortNameLc(id) const ontology = ontologies[ontologyShortNameLc] if (id.includes(':')) { @@ -146,7 +153,6 @@ async function checkOntologyLabelsAndIds(key, ontologies, groups) { const idParts = id.split(':') id = `${idParts[0]}_${idParts[1]}` } - if (!(id in ontology)) { // Register invalid ontology ID const msg = `Invalid ontology ID: ${id}` @@ -174,9 +180,10 @@ async function checkOntologyLabelsAndIds(key, ontologies, groups) { } /** Get ontology ID values for key in AnnData file */ -async function getOntologyIdsAndLabels(requiredName, hdf5File) { +export async function getOntologyIdsAndLabels(columnName, hdf5File) { const obs = await hdf5File.get('obs') const obsValues = await Promise.all(obs.values) + const isRequired = REQUIRED_CONVENTION_COLUMNS.includes(columnName) // Old versions of the AnnData spec used __categories as an obs. // However, in new versions (since before 2023-01-23) of AnnData spec, @@ -192,11 +199,14 @@ async function getOntologyIdsAndLabels(requiredName, hdf5File) { return null } - const idKey = requiredName - const labelKey = `${requiredName}__ontology_label` + const idKey = columnName + const labelKey = `${columnName}__ontology_label` + + const idGroup = obsValues.find(o => findMatchingGroup(o, idKey)) + const labelGroup = obsValues.find(o => findMatchingGroup(o, labelKey)) - const idGroup = obsValues.find(o => o.name.endsWith(idKey)) - const labelGroup = obsValues.find(o => o.name.endsWith(labelKey)) + // exit when optional metadata isn't found, like cell_type + if (!idGroup && !isRequired) { return } // AnnData organizes each "obs" annotation (e.g. disease__ontology_label, // sex) into a container with a `categories` frame and a `code` frame. @@ -231,15 +241,13 @@ async function validateOntologyLabelsAndIds(hdf5File) { const ontologies = await fetchOntologies() // Validate IDs for species, organ, disease, and library preparation protocol - for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) { - const column = REQUIRED_CONVENTION_COLUMNS[i] - if (!column.endsWith('__ontology_label')) {continue} - const key = column.split('__ontology_label')[0] - const groups = await getOntologyIdsAndLabels(key, hdf5File) + for (let i = 0; i < ONTOLOGY_PROPS.length; i++) { + const column = ONTOLOGY_PROPS[i] + const groups = await getOntologyIdsAndLabels(column, hdf5File) if (groups) { issues = issues.concat( - await checkOntologyLabelsAndIds(key, ontologies, groups) + await checkOntologyLabelsAndIds(column, ontologies, groups) ) } } @@ -253,14 +261,12 @@ async function validateOntologyIdFormat(hdf5File) { let issues = [] // Validate IDs for species, organ, disease, and library preparation protocol - for (let i = 0; i < REQUIRED_CONVENTION_COLUMNS.length; i++) { - const column = REQUIRED_CONVENTION_COLUMNS[i] - if (!column.endsWith('__ontology_label')) {continue} - const key = column.split('__ontology_label')[0] - const ontologyIds = await getOntologyIds(key, hdf5File) + for (let i = 0; i < ONTOLOGY_PROPS.length; i++) { + const column = ONTOLOGY_PROPS[i] + const ontologyIds = await getOntologyIds(column, hdf5File) issues = issues.concat( - checkOntologyIdFormat(key, ontologyIds) + checkOntologyIdFormat(column, ontologyIds) ) } diff --git a/app/javascript/lib/validation/validate-file-content.js b/app/javascript/lib/validation/validate-file-content.js index b3e42ebc1..535dacab8 100644 --- a/app/javascript/lib/validation/validate-file-content.js +++ b/app/javascript/lib/validation/validate-file-content.js @@ -22,7 +22,8 @@ import { } from './shared-validation' import { parseDifferentialExpressionFile } from './validate-differential-expression' import { parseAnnDataFile } from './validate-anndata' - +import { fetchOntologies, getOntologyBasedProps } from '~/lib/validation/ontology-validation' +import { getOntologyShortNameLc, getLabelSuffixForOntology } from './shared-validation' /** * Gzip decompression requires reading the whole file, given the current @@ -36,7 +37,7 @@ const MAX_GZIP_FILESIZE = 50 * oneMiB /** File extensions / suffixes that indicate content must be gzipped */ const EXTENSIONS_MUST_GZIP = ['gz', 'bam', 'tbi', 'csi'] - +const ONTOLOGY_PROPS = getOntologyBasedProps() /** * Helper function to verify first pair of headers is NAME or TYPE @@ -217,7 +218,11 @@ export async function parseMetadataFile(chunker, mimeType, fileOptions) { const { headers, delimiter } = await getParsedHeaderLines(chunker, mimeType) let issues = validateCapFormat(headers) issues = issues.concat(validateNoMetadataCoordinates(headers)) + let ontologies + // keep track of a map of ontology-based errors to avoid duplications + const knownErrors = [] if (fileOptions.use_metadata_convention) { + ontologies = await fetchOntologies() issues = issues.concat(validateRequiredMetadataColumns(headers)) } @@ -232,12 +237,87 @@ export async function parseMetadataFile(chunker, mimeType, fileOptions) { issues = issues.concat(validateUniqueCellNamesWithinFile(line, isLastLine, dataObj)) issues = issues.concat(validateMetadataLabelMatches(headers, line, isLastLine, dataObj)) issues = issues.concat(validateGroupColumnCounts(headers, line, isLastLine, dataObj)) + if (fileOptions.use_metadata_convention) { + issues = issues.concat(validateConventionTerms(headers, line, ontologies, knownErrors)) + } // add other line-by-line validations here } }) return { issues, delimiter, numColumns: headers[0].length } } +/** validate all ontology-based convention terms in a given line */ +export function validateConventionTerms(headers, line, ontologies, knownErrors) { + let issues = [] + const metadataHeaders = headers[0] + for (let i = 0; i < metadataHeaders.length; i++) { + const header = metadataHeaders[i] + if (ONTOLOGY_PROPS.includes(header)) { + const ontologyId = line[i] + const labelHeader = `${header}${getLabelSuffixForOntology(ontologyId)}` + const labelIdx = metadataHeaders.indexOf(labelHeader) + const label = line[labelIdx] + issues = issues.concat(validateOntologyTerm(header, ontologyId, label, ontologies, knownErrors)) + } + } + return issues +} + +/** validate a single ontology ID against stored ontologies and return issues */ +export function validateOntologyTerm(prop, ontologyId, label, ontologies, knownErrors) { + const issues = [] + const ontologyShortNameLc = getOntologyShortNameLc(ontologyId) + const ontology = ontologies[ontologyShortNameLc] + + if (ontologyId.includes(':')) { + // Convert colon to underscore for ontology lookup + const idParts = ontologyId.split(':') + ontologyId = `${idParts[0]}_${idParts[1]}` + } + + let errorIdentifier + let issue + + if (!ontology) { + errorIdentifier = `${ontologyId}-label-lookup-error` + const accepted = Object.keys(ontologies).join(', ') + const msg = + `Ontology ID "${ontologyId}" ` + + `is not among accepted ontologies (${accepted}) ` + + `for key "${prop}"` + + issue = ['error', 'ontology:label-lookup-error', msg] + } else if (!(ontologyId in ontology)) { + // Register invalid ontology ID + const msg = `Invalid ontology ID: ${ontologyId}` + errorIdentifier = `${ontologyId}-invalid-id` + issue = [ + 'error', 'ontology:label-lookup-error', msg, + { subtype: 'ontology:invalid-id' } + ] + } else { + const validLabels = ontology[ontologyId] + + if (!(validLabels.includes(label))) { + errorIdentifier = `${ontologyId}-label-lookup-error` + // Register invalid ontology label + const prettyLabels = validLabels.join(', ') + const validLabelsClause = `Valid labels for ${ontologyId}: ${prettyLabels}` + const msg = `Invalid ${prop} label "${label}". ${validLabelsClause}` + issue = [ + 'error', 'ontology:label-not-match-id', msg, + { subtype: 'ontology:invalid-label' } + ] + } + } + // only store unique instances of errors since we're validating line by line + if (issue && knownErrors.indexOf(errorIdentifier) < 0) { + issues.push(issue) + knownErrors.push(errorIdentifier) + } + return issues +} + /** parse a cluster file, and return an array of issues, along with file parsing info */ export async function parseClusterFile(chunker, mimeType) { const { headers, delimiter } = await getParsedHeaderLines(chunker, mimeType) diff --git a/app/models/ingest_job.rb b/app/models/ingest_job.rb index 146de3faa..feec299ba 100644 --- a/app/models/ingest_job.rb +++ b/app/models/ingest_job.rb @@ -1044,7 +1044,7 @@ def log_to_mixpanel mixpanel_log_props = get_job_analytics # log job properties to Mixpanel MetricsService.log(mixpanel_event_name, mixpanel_log_props, user) - report_anndata_summary if study_file.is_viz_anndata? && !%i[ingest_anndata differential_expression].include?(action) + report_anndata_summary if study_file.is_viz_anndata? end # set a mixpanel event name based on action @@ -1087,7 +1087,7 @@ def anndata_summary_props commands = client.get_job_command_line(job:) commands.detect { |c| c == '--extract' } || client.job_error(job.name).present? end.count - num_files_extracted += 1 if extracted_raw_counts?(initial_extract) + num_files_extracted += 1 if extracted_raw_counts?(initial_extract) && job_status == 'success' # event properties for Mixpanel summary event { perfTime: job_perftime, @@ -1115,10 +1115,20 @@ def extracted_raw_counts?(job) extract_params.include?('raw_counts') end + # determine if this job qualifies for sending an ingestSummary event + # will return false if summary exists, this is a DE job, or + # a successful AnnData extract (meaning downstream jobs are running) + def skip_anndata_summary? + study_file.has_anndata_summary? || + action == :differential_expression || + should_retry? || + (!failed? && action == :ingest_anndata) + end + # report a summary of all AnnData extraction for this file to Mixpanel, if this is the last job def report_anndata_summary study_file.reload - return false if study_file.has_anndata_summary? # don't bother checking if summary is already sent + return false if skip_anndata_summary? file_identifier = "#{study_file.upload_file_name} (#{study_file.id})" Rails.logger.info "Checking AnnData summary for #{file_identifier} after #{action}" diff --git a/package.json b/package.json index 434ab4617..73291a722 100644 --- a/package.json +++ b/package.json @@ -105,7 +105,7 @@ "prop-types": "^15.7.2", "react-select-event": "^5.3.0", "stylelint-prettier": "^1.1.2", - "vite": "^4.5.10", + "vite": "^4.5.13", "vite-plugin-ruby": "^3.0.5" } } diff --git a/test/js/lib/ontology-validation.test.js b/test/js/lib/ontology-validation.test.js index f0f06e171..b461cb469 100644 --- a/test/js/lib/ontology-validation.test.js +++ b/test/js/lib/ontology-validation.test.js @@ -1,7 +1,8 @@ const fetch = require('node-fetch') +import { metadataSchema } from 'lib/validation/shared-validation' import { - fetchOntologies + fetchOntologies, getOntologyShortNames, getOntologyBasedProps } from 'lib/validation/ontology-validation' import { @@ -9,6 +10,7 @@ import { } from './node-web-api' describe('Client-side file validation for AnnData', () => { + const expectedOntologyNames = ['cl', 'uo', 'mondo', 'pato', 'hancestro', 'efo', 'uberon', 'ncbitaxon'] beforeAll(() => { global.fetch = fetch @@ -20,9 +22,21 @@ describe('Client-side file validation for AnnData', () => { it('Parses minified ontologies', async () => { const ontologies = await fetchOntologies() - const expectedOntologyNames = ['mondo', 'pato', 'efo', 'uberon', 'ncbitaxon'] expect(Object.keys(ontologies)).toEqual(expectedOntologyNames) const expectedSpeciesNames = ['Homo sapiens', 'human'] expect(ontologies.ncbitaxon['NCBITaxon_9606']).toEqual(expectedSpeciesNames) }) + + it('finds all ontology-based metadata properties', () => { + const propNames = Object.keys(metadataSchema.properties).filter(p => { + return p !== 'organ_region' && metadataSchema.properties[p].ontology + }) + const ontologyProps = getOntologyBasedProps() + expect(propNames).toEqual(ontologyProps) + }) + + it('loads all ontology shortnames', () => { + const shortNames = getOntologyShortNames() + expect(shortNames).toEqual(expectedOntologyNames) + }) }) diff --git a/test/js/lib/validate-anndata.test.js b/test/js/lib/validate-anndata.test.js index 71232650a..f2e686c87 100644 --- a/test/js/lib/validate-anndata.test.js +++ b/test/js/lib/validate-anndata.test.js @@ -1,10 +1,26 @@ import { - getHdf5File, parseAnnDataFile, getAnnDataHeaders, checkOntologyIdFormat + getHdf5File, parseAnnDataFile, getAnnDataHeaders, checkOntologyIdFormat, getOntologyIdsAndLabels, + checkOntologyLabelsAndIds, findMatchingGroup } from 'lib/validation/validate-anndata' +import { fetchOntologies } from '~/lib/validation/ontology-validation' +const fetch = require('node-fetch') +import { + nodeCaches, nodeHeaders, nodeRequest, nodeResponse +} from './node-web-api' const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata' describe('Client-side file validation for AnnData', () => { + beforeAll(() => { + jest.setTimeout(10000) + global.fetch = fetch + + global.caches = nodeCaches; + global.Response = nodeResponse + global.Request = nodeRequest + global.Headers = nodeHeaders + }) + it('Parses AnnData headers', async () => { const url = `${BASE_URL}/valid.h5ad` const expectedHeaders = [ @@ -78,6 +94,35 @@ describe('Client-side file validation for AnnData', () => { expect(parseResults.issues[0]).toEqual(expectedIssue) }) + it('validates ontology ids for given column', async () => { + const ontologies = await fetchOntologies() + const url = `${BASE_URL}/valid.h5ad` + const remoteProps = { url } + const hdf5File = await getHdf5File(url, remoteProps) + const key = 'disease' + const groups = await getOntologyIdsAndLabels(key, hdf5File) + let issues = await checkOntologyLabelsAndIds(key, ontologies, groups) + expect(issues).toHaveLength(0) + }) + + it('finds invalidate ontology entries for a given column', async () => { + const ontologies = await fetchOntologies() + const url = `${BASE_URL}/invalid_disease_label.h5ad` + const remoteProps = { url } + const hdf5File = await getHdf5File(url, remoteProps) + const key = 'disease' + const groups = await getOntologyIdsAndLabels(key, hdf5File) + let issues = await checkOntologyLabelsAndIds(key, ontologies, groups) + expect(issues).toHaveLength(1) + }) + + it('finds the correct obs group based on name', () => { + const validGroup = { name: '/obs/cell_type' } + const invalidGroup = { name: '/obs/author_cell_type' } + expect(findMatchingGroup(validGroup, 'cell_type')).toBeTruthy() + expect(findMatchingGroup(invalidGroup, 'cell_type')).not.toBeTruthy() + }) + // TODO (SCP-5813): Uncomment this test upon completing "Enable ontology validation for remote AnnData" // // it('Parses AnnData rows and reports invalid ontology labels', async () => { diff --git a/test/js/lib/validate-file-content.test.js b/test/js/lib/validate-file-content.test.js index 7f888b6c1..dc7e3c050 100644 --- a/test/js/lib/validate-file-content.test.js +++ b/test/js/lib/validate-file-content.test.js @@ -1,9 +1,14 @@ import React from 'react' +const fetch = require('node-fetch') import { render, screen } from '@testing-library/react' import '@testing-library/jest-dom/extend-expect' import ValidateFile from 'lib/validation/validate-file' -import { REQUIRED_CONVENTION_COLUMNS } from 'lib/validation/shared-validation' +import { fetchOntologies } from 'lib/validation/ontology-validation' +import { validateConventionTerms, validateOntologyTerm } from 'lib/validation/validate-file-content' +import { + REQUIRED_CONVENTION_COLUMNS, getOntologyShortNameLc, getLabelSuffixForOntology +} from 'lib/validation/shared-validation' import { getLogProps } from 'lib/validation/log-validation' import ValidationMessage from 'components/validation/ValidationMessage' import * as MetricsApi from 'lib/metrics-api' @@ -13,7 +18,20 @@ import { createMockFile } from './file-mock-utils' const validateLocalFile = ValidateFile.validateLocalFile +import { + nodeCaches, nodeHeaders, nodeRequest, nodeResponse +} from './node-web-api' + describe('Client-side file validation', () => { + beforeAll(() => { + global.fetch = fetch + + global.caches = nodeCaches; + global.Response = nodeResponse + global.Request = nodeRequest + global.Headers = nodeHeaders + }) + jest .spyOn(UserProvider, 'getFeatureFlagsWithDefaults') .mockReturnValue({ @@ -333,7 +351,6 @@ describe('Client-side file validation', () => { it('does not catch gzipped RDS file without .gz extension', async () => { const file = createMockFile({ fileName: 'foo.rds', content: '\x1F\x2E3lkjf3' }) const [{ errors }] = await validateLocalFile(file, { file_type: 'Cluster' }) - console.log('errors', errors) const hasMissingGzipExtensionError = errors.some( error => error[1] === 'encoding:missing-gz-extension' ) @@ -464,6 +481,15 @@ it('Catches disallowed characters in metadata header', async () => { // With the client side file validation feature flag set to false expect invalid files to pass describe('Client-side file validation feature flag is false', () => { + beforeAll(() => { + global.fetch = fetch + + global.caches = nodeCaches; + global.Response = nodeResponse + global.Request = nodeRequest + global.Headers = nodeHeaders + }) + beforeEach(() => { jest .spyOn(UserProvider, 'getFeatureFlagsWithDefaults') @@ -518,3 +544,102 @@ describe('Client-side file validation feature flag is false', () => { }) } ) + +describe('validates file contents against minified ontologies', () => { + beforeAll(() => { + global.fetch = fetch + + global.caches = nodeCaches; + global.Response = nodeResponse + global.Request = nodeRequest + global.Headers = nodeHeaders + }) + + beforeEach(() => { + jest + .spyOn(UserProvider, 'getFeatureFlagsWithDefaults') + .mockReturnValue({ + clientside_validation: true + }) + }) + + it('validates classic metadata file', async () => { + const content = [ + "NAME\tbiosample_id\tCellID\tdisease\tdisease__ontology_label\tdonor_id\tlibrary_preparation_protocol" + + "\tlibrary_preparation_protocol__ontology_label\torgan\torgan__ontology_label\tsex\tspecies\tspecies__ontology_label", + "TYPE\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup", + "CELL_0001\tid1\tcell1\tMONDO_0000001\tdisease or disorder\tdonor1\tEFO_0008919\tSeq-Well\tUBERON_0001913" + + "\tmilk\tfemale\tNCBITaxon_9606\tHomo sapiens" + ] + const file = createMockFile({ + fileName: 'metadata_valid.tsv', + content: content.join("\n") + }) + const [{ errors }] = await validateLocalFile(file, { file_type: 'Metadata', use_metadata_convention: true }) + expect(errors).toHaveLength(0) + }) + + it('finds ontology error in classic metadata file', async () => { + const content = [ + "NAME\tbiosample_id\tCellID\tdisease\tdisease__ontology_label\tdonor_id\tlibrary_preparation_protocol" + + "\tlibrary_preparation_protocol__ontology_label\torgan\torgan__ontology_label\tsex\tspecies\tspecies__ontology_label", + "TYPE\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup\tgroup", + "CELL_0001\tid1\tcell1\tMONDO_0000001\tdisease or disorder\tdonor1\tEFO_0008919\tnot label\tUBERON_0001913" + + "\tmilk\tfemale\tNCBITaxon_9606\tfoo" + ] + const file = createMockFile({ + fileName: 'metadata_valid.tsv', + content: content.join("\n") + }) + const [{ errors }] = await validateLocalFile(file, { file_type: 'Metadata', use_metadata_convention: true }) + expect(errors).toHaveLength(2) + }) + + it('validates single line or term from a metadata file', async() => { + const ontologies = await fetchOntologies() + const headers = [ + [ "NAME", "species", "species__ontology_label","disease", "disease__ontology_label"], + ["TYPE", "group", "group", "group", "group"] + ] + // validate whole line + const line = ["CELL_0001", "NCBITaxon_9606", "Homo sapiens", "MONDO_0000001", "disease or disorder"] + let knownErrors = [] + let issues = validateConventionTerms(headers, line, ontologies, knownErrors) + expect(issues).toHaveLength(0) + const badLine = ["CELL_0001", "NCBITaxon_9606", "not the label","MONDO_0000001", "also not label"] + issues = validateConventionTerms(headers, badLine, ontologies, knownErrors) + expect(issues.length).toBe(2) + expect(knownErrors.length).toBe(2) + // validate single term + let prop = 'library_preparation_protocol' + let ontologyId = 'EFO_0008919' + let label = 'Seq-Well' + knownErrors = [] + issues = validateOntologyTerm(prop, ontologyId, label, ontologies, knownErrors) + expect(issues.length).toBe(0) + prop = 'cell_type' + ontologyId = 'CL_0000066' + label = 'bad label' + issues = validateOntologyTerm(prop, ontologyId, label, ontologies, knownErrors) + expect(issues.length).toBe(1) + expect(knownErrors.length).toBe(1) + prop = 'organ' + ontologyId = 'foobar' + label = 'bad label' + issues = validateOntologyTerm(prop, ontologyId, label, ontologies, knownErrors) + expect(issues.length).toBe(1) + expect(knownErrors.length).toBe(2) + }) + + it('gets ontology shortname from ID', () => { + const ontologyId = "EFO_0008919" + expect("efo").toEqual(getOntologyShortNameLc(ontologyId)) + }) + + it('gets label suffix depending on ontology', () => { + const efoId = "EFO_0008919" + expect("__ontology_label").toEqual(getLabelSuffixForOntology(efoId)) + const uoId = "UO_0000036" + expect("_label").toEqual(getLabelSuffixForOntology(uoId)) + }) +}) diff --git a/test/js/upload-wizard/upload-new-study.test.js b/test/js/upload-wizard/upload-new-study.test.js index 472adf0fb..a7f1d495f 100644 --- a/test/js/upload-wizard/upload-new-study.test.js +++ b/test/js/upload-wizard/upload-new-study.test.js @@ -12,9 +12,10 @@ import { import { renderWizardWithStudy, getSelectByLabelText, saveButton, mockCreateStudyFile } from './upload-wizard-test-utils' -import fetch from 'node-fetch' +const fetch = require('node-fetch') import { setMetricsApiMockFlag } from 'lib/metrics-api' import { getTokenExpiry } from './upload-wizard-test-utils' +import { nodeCaches, nodeHeaders, nodeRequest, nodeResponse } from '../lib/node-web-api' const processedFileName = 'example_processed_dense.txt' const rawCountsFileName = 'example_raw_counts.txt' @@ -23,8 +24,13 @@ describe('creation of study files', () => { beforeAll(() => { jest.restoreAllMocks() // This test is long--running all steps in series as if it was a user uploading a new study from scratch--so allow extra time - jest.setTimeout(10000) + jest.setTimeout(20000) global.fetch = fetch + + global.caches = nodeCaches; + global.Response = nodeResponse + global.Request = nodeRequest + global.Headers = nodeHeaders setMetricsApiMockFlag(true) window.SCP = { readOnlyTokenObject: { @@ -39,6 +45,9 @@ describe('creation of study files', () => { afterEach(() => { // Restores all mocks back to their original value jest.restoreAllMocks() + }) + + afterAll(() => { jest.setTimeout(5000) }) @@ -209,11 +218,22 @@ async function testMetadataUpload({ createFileSpy }) { const goodFileName = 'metadata-good.txt' + const goodContent = [ + 'NAME,disease,disease__ontology_label,species,species__ontology_label,library_preparation_protocol,'+ + 'library_preparation_protocol__ontology_label,organ,organ__ontology_label,sex,ethnicity__ontology_label,' + + 'ethnicity,donor_id,biosample_id', + 'TYPE,group,group,group,group,group,group,group,group,group,group,group,group,group', + 'CELL_0001,MONDO_0000001,disease or disorder,NCBITaxon_9606,Homo sapiens,EFO_0008919,' + + 'Seq-Well,UBERON_0001913,milk,female,European ancestry,HANCESTRO_0005,BM01,BM01_16dpp_r3' + ] fireFileSelectionEvent(screen.getByTestId('file-input'), { fileName: goodFileName, - content: 'NAME,cell_type,cell_type__ontology_label,organism_age,disease,disease__ontology_label,species,species__ontology_label,geographical_region,geographical_region__ontology_label,library_preparation_protocol,library_preparation_protocol__ontology_label,organ,organ__ontology_label,sex,is_living,organism_age__unit,organism_age__unit_label,ethnicity__ontology_label,ethnicity,race,race__ontology_label,sample_type,donor_id,biosample_id,biosample_type,preservation_method\nTYPE,group,group,numeric,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group,group' + content: goodContent.join('\n') }) - await waitForElementToBeRemoved(() => screen.getByTestId('file-validation-spinner')) + await waitForElementToBeRemoved( + () => screen.getByTestId('file-validation-spinner'), + { timeout: 20000 } + ) expect(screen.getByTestId('file-selection-name')).toHaveTextContent(goodFileName) expect(saveButton()).not.toBeDisabled() @@ -221,9 +241,9 @@ async function testMetadataUpload({ createFileSpy }) { await waitForElementToBeRemoved(() => screen.getByTestId('file-save-spinner')) expect(createFileSpy).toHaveBeenLastCalledWith(expect.objectContaining({ - chunkEnd: 627, + chunkEnd: 487, chunkStart: 0, - fileSize: 627, + fileSize: 487, isChunked: false, studyAccession: 'SCP1', studyFileData: formData @@ -439,5 +459,3 @@ async function testSequenceFileUpload({ createFileSpy }) { })) expect(screen.getByTestId('sequence-status-badge')).toHaveClass('complete') } - -COORDINATE_LABEL_FILE diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb index 65869e16c..4d270c19d 100644 --- a/test/models/ingest_job_test.rb +++ b/test/models/ingest_job_test.rb @@ -443,7 +443,8 @@ class IngestJobTest < ActiveSupport::TestCase ) job_mock = Minitest::Mock.new 2.times { job_mock.expect :object, cluster_job } - dummy_job = Google::Apis::BatchV1::Job.new(status: Google::Apis::BatchV1::JobStatus.new(state: 'RUNNING')) + dummy_job = Google::Apis::BatchV1::Job.new(name: pipeline_name, + status: Google::Apis::BatchV1::JobStatus.new(state: 'RUNNING')) pipeline_mock = Minitest::Mock.new pipeline_mock.expect :get_job, dummy_job, [pipeline_name] @@ -452,11 +453,15 @@ class IngestJobTest < ActiveSupport::TestCase DelayedJobAccessor.stub :find_jobs_by_handler_type, [Delayed::Job.new] do DelayedJobAccessor.stub :dump_job_handler, job_mock do ApplicationController.stub :batch_api_client, pipeline_mock do - metadata_job.report_anndata_summary - job_mock.verify - pipeline_mock.verify - ann_data_file.reload - assert_not ann_data_file.has_anndata_summary? + metadata_job.stub :failed?, false do + metadata_job.stub :should_retry?, false do + metadata_job.report_anndata_summary + job_mock.verify + pipeline_mock.verify + ann_data_file.reload + assert_not ann_data_file.has_anndata_summary? + end + end end end end @@ -481,10 +486,95 @@ class IngestJobTest < ActiveSupport::TestCase DelayedJobAccessor.stub :find_jobs_by_handler_type, [] do MetricsService.stub :log, metrics_mock do cluster_job.stub :anndata_summary_props, mock_job_props do - cluster_job.report_anndata_summary - ann_data_file.reload - assert ann_data_file.has_anndata_summary? - metrics_mock.verify + cluster_job.stub :should_retry?, false do + cluster_job.stub :failed?, false do + cluster_job.report_anndata_summary + ann_data_file.reload + assert ann_data_file.has_anndata_summary? + metrics_mock.verify + end + end + end + end + end + end + + test 'should determine when to skip AnnData summary' do + study = FactoryBot.create( + :detached_study, name_prefix: 'ingestSummary skip test', user: @user, test_array: @@studies_to_clean + ) + ann_data_file = FactoryBot.create(:ann_data_file, name: 'matrix.h5ad', study:) + ann_data_file.ann_data_file_info.reference_file = false + ann_data_file.options[:anndata_summary] = false + ann_data_file.save + pipeline_name = SecureRandom.uuid + job = IngestJob.new( + pipeline_name:, study:, user: @user, study_file: ann_data_file, action: :ingest_anndata + ) + + job.stub :failed?, true do + job.stub :should_retry?, false do + assert_not job.skip_anndata_summary? + end + end + + job.stub :failed?, false do + job.stub :should_retry?, false do + assert job.skip_anndata_summary? + job.action = :differential_expression + assert job.skip_anndata_summary? + ann_data_file.options[:anndata_summary] = true + job.action = :ingest_cluster + assert job.skip_anndata_summary? + end + end + end + + test 'should report AnnData summary on extraction failure' do + study = FactoryBot.create( + :detached_study, name_prefix: 'ingestSummary report', user: @user, test_array: @@studies_to_clean + ) + ann_data_file = FactoryBot.create(:ann_data_file, name: 'matrix.h5ad', study:) + ann_data_file.ann_data_file_info.reference_file = false + ann_data_file.upload_file_size = 1.megabyte + ann_data_file.options[:anndata_summary] = false + ann_data_file.save + params = AnnDataIngestParameters.new( + anndata_file: ann_data_file.gs_url, file_size: ann_data_file.upload_file_size, + extract_raw_counts: true, raw_location: '.raw' + ) + pipeline_name = SecureRandom.uuid + job = IngestJob.new( + pipeline_name:, study:, user: @user, study_file: ann_data_file, action: :ingest_anndata + ) + mock_job_props = { + perfTime: 60000, + fileName: ann_data_file.name, + fileType: 'AnnData', + fileSize: ann_data_file.upload_file_size, + studyAccession: study.accession, + trigger: ann_data_file.upload_trigger, + jobStatus: 'failed', + numFilesExtracted: 0, + machineType: params.machine_type, + action: :ingest_pipeline, + ingest_action: :ingest_anndata, + exitCode: 1 + } + metrics_mock = Minitest::Mock.new + metrics_mock.expect :call, true, ['ingestSummary', mock_job_props, @user] + + DelayedJobAccessor.stub :find_jobs_by_handler_type, [] do + MetricsService.stub :log, metrics_mock do + job.stub :anndata_summary_props, mock_job_props do + job.stub :failed?, true do + job.stub :should_retry?, false do + job.report_anndata_summary + ann_data_file.reload + assert ann_data_file.has_anndata_summary? + metrics_mock.verify + end + end end end end @@ -979,7 +1069,7 @@ class IngestJobTest < ActiveSupport::TestCase # must mock batch_api_client getting pipeline metadata client_mock = Minitest::Mock.new - 4.times { client_mock.expect :exit_code_from_task, 137, [pipeline_name] } + 5.times { client_mock.expect :exit_code_from_task, 137, [pipeline_name] } client_mock.expect :get_job_resources, vm_info, [], job: dummy_job client_mock.expect :get_job_command_line, commands, [], job: dummy_job # new pipeline mock is resubmitted job with larger machine_type diff --git a/test/test_data/validation/metadata_bad_v3-0-0.tsv b/test/test_data/validation/metadata_bad_v3-0-0.tsv new file mode 100644 index 000000000..ffe3f1536 --- /dev/null +++ b/test/test_data/validation/metadata_bad_v3-0-0.tsv @@ -0,0 +1,32 @@ +NAME cell_type cell_type__ontology_label organism_age disease disease__ontology_label species species__ontology_label library_preparation_protocol library_preparation_protocol__ontology_label organ organ__ontology_label sex is_living organism_age__unit organism_age__unit_label ethnicity__ontology_label ethnicity sample_type donor_id biosample_id biosample_type preservation_method +TYPE group group numeric group group group group group group group group group group group group group group group group group group group +CELL_0001 CL_0000066 epithelial cell 1 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0002 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0003 CL_0000066 epithelial cell 12 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0004 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0005 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0006 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0007 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0008 CL_0000066 epithelial cell 61 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0009 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00010 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00011 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00012 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00013 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00014 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00015 CL_0000066 epithelial cell 58 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0001t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0002t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0003t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0004t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0005t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0006t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0007t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0008t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0009t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year invalid label HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00010t CL_0000066 epithelial cell 103 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year invalid label HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00011t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year invalid label HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00012t CL_0000066 not the label 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00013t CL_0000066 not the label 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00014t CL_0000066 epithelial cell 99 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00015t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh diff --git a/test/test_data/validation/metadata_good_v3-0-0.tsv b/test/test_data/validation/metadata_good_v3-0-0.tsv new file mode 100644 index 000000000..47ed307e9 --- /dev/null +++ b/test/test_data/validation/metadata_good_v3-0-0.tsv @@ -0,0 +1,32 @@ +NAME cell_type cell_type__ontology_label organism_age disease disease__ontology_label species species__ontology_label library_preparation_protocol library_preparation_protocol__ontology_label organ organ__ontology_label sex is_living organism_age__unit organism_age__unit_label ethnicity__ontology_label ethnicity sample_type donor_id biosample_id biosample_type preservation_method +TYPE group group numeric group group group group group group group group group group group group group group group group group group group +CELL_0001 CL_0000066 epithelial cell 1 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0002 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0003 CL_0000066 epithelial cell 12 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0004 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0005 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0006 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0007 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0008 CL_0000066 epithelial cell 61 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0009 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00010 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00011 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00012 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00013 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00014 CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00015 CL_0000066 epithelial cell 58 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0001t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0002t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0003t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0004t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0005t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0006t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0007t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0008t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_0009t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00010t CL_0000066 epithelial cell 103 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00011t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00012t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00013t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00014t CL_0000066 epithelial cell 99 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh +CELL_00015t CL_0000066 epithelial cell 31 MONDO_0000001 disease or disorder NCBITaxon_9606 Homo sapiens EFO_0008919 Seq-Well UBERON_0001913 milk female yes UO_0000036 year European ancestry HANCESTRO_0005 direct from donor - fresh BM01 BM01_16dpp_r3 PrimaryBioSample_BodyFluid Fresh diff --git a/yarn.lock b/yarn.lock index d24e2d593..3ed99cc23 100644 --- a/yarn.lock +++ b/yarn.lock @@ -9471,10 +9471,10 @@ vite-plugin-ruby@^3.0.5: debug "^4.3.4" fast-glob "^3.2.12" -vite@^4.5.10: - version "4.5.10" - resolved "https://registry.yarnpkg.com/vite/-/vite-4.5.10.tgz#fc6823d5347fd171a25ae6379f05b0be700d303e" - integrity sha512-f2ueoukYTMI/5kMMT7wW+ol3zL6z6PjN28zYrGKAjnbzXhRXWXPThD3uN6muCp+TbfXaDgGvRuPsg6mwVLaWwQ== +vite@^4.5.13: + version "4.5.13" + resolved "https://registry.yarnpkg.com/vite/-/vite-4.5.13.tgz#778534a947112c6c455e89737730fae5d458a294" + integrity sha512-Hgp8IF/yZDzKsN1hQWOuQZbrKiaFsbQud+07jJ8h9m9PaHWkpvZ5u55Xw5yYjWRXwRQ4jwFlJvY7T7FUJG9MCA== dependencies: esbuild "^0.18.10" postcss "^8.4.27"