1- -- version 1.0.2 of derived table schema and data definition
1+ -- version 1.0.3 of derived table schema and data definition
22-- when making updates:
33-- increment the version number here
44-- update pom.xml with the new version number
@@ -11,6 +11,7 @@ DROP TABLE IF EXISTS clinical_data_derived;
1111DROP TABLE IF EXISTS clinical_event_derived;
1212DROP TABLE IF EXISTS genetic_alteration_derived;
1313DROP TABLE IF EXISTS generic_assay_data_derived;
14+ DROP TABLE IF EXISTS mutation_derived;
1415
1516-- the following query "fixes" the sample_profile table by adding entries for "missing" samples -- those which appear in mutated case list but not in the MySQL sample_profile table
1617-- this problem was handled in java at run time in legacy codebase
@@ -172,8 +173,9 @@ CREATE TABLE IF NOT EXISTS genomic_event_derived
172173 sv_event_info String,
173174 patient_unique_id String,
174175 off_panel Boolean DEFAULT FALSE
175- ) ENGINE = MergeTree
176- ORDER BY (variant_type, entrez_gene_id, hugo_gene_symbol, genetic_profile_stable_id, sample_unique_id);
176+ ) ENGINE = MergeTree
177+ ORDER BY (genetic_profile_stable_id, cancer_study_identifier, variant_type, entrez_gene_id, hugo_gene_symbol, sample_unique_id);
178+
177179
178180INSERT INTO genomic_event_derived
179181-- Insert Mutations
@@ -373,10 +375,11 @@ SELECT
373375 ifNull(ce .stop_date , 0 ) AS stop_date,
374376 ce .event_type AS event_type,
375377 cs .cancer_study_identifier
376- FROM clinical_event ce
377- LEFT JOIN clinical_event_data ced ON ce .clinical_event_id = ced .clinical_event_id
378- INNER JOIN patient p ON ce .patient_id = p .internal_id
379- INNER JOIN cancer_study cs ON p .cancer_study_id = cs .cancer_study_id ;
378+
379+ FROM clinical_event_data ced
380+ RIGHT JOIN clinical_event ce ON ced .clinical_event_id = ce .clinical_event_id
381+ INNER JOIN patient p ON ce .patient_id = p .internal_id
382+ INNER JOIN cancer_study cs ON p .cancer_study_id = cs .cancer_study_id ;
380383
381384CREATE TABLE IF NOT EXISTS genetic_alteration_derived
382385(
@@ -485,6 +488,211 @@ FROM
485488 JOIN cancer_study cs ON cs .cancer_study_id = subquery .cancer_study_id
486489 JOIN sample_derived sd ON sd .internal_id = subquery .sample_id ;
487490
491+
492+
493+ DROP TABLE IF EXISTS mutation_derived;
494+ CREATE TABLE mutation_derived
495+ (
496+ molecularProfileId String COMMENT ' Stable ID of the genetic profile' ,
497+ sampleId String COMMENT ' Stable ID of the sample' ,
498+ sampleInternalId Int64,
499+ patientId String COMMENT ' Stable ID of the patient' ,
500+ entrezGeneId Int64 COMMENT ' Entrez Gene ID from mutation table (NOT NULL)' ,
501+ studyId String COMMENT ' Cancer study identifier' ,
502+ center Nullable(String) COMMENT ' Sequencing center' ,
503+ mutationStatus Nullable(String) COMMENT ' Mutation status (e.g., Somatic, Germline)' ,
504+ validationStatus Nullable(String) COMMENT ' Validation status' ,
505+ tumorAltCount Nullable(Int64) COMMENT ' Tumor alternate allele count' ,
506+ tumorRefCount Nullable(Int64) COMMENT ' Tumor reference allele count' ,
507+ normalAltCount Nullable(Int64) COMMENT ' Normal alternate allele count' ,
508+ normalRefCount Nullable(Int64) COMMENT ' Normal reference allele count' ,
509+ aminoAcidChange Nullable(String) COMMENT ' Amino acid change' ,
510+ chr Nullable(String) COMMENT ' Chromosome' ,
511+ startPosition Nullable(Int64) COMMENT ' Start position' ,
512+ endPosition Nullable(Int64) COMMENT ' End position' ,
513+ referenceAllele Nullable(String) COMMENT ' Reference allele' ,
514+ tumorSeqAllele Nullable(String) COMMENT ' Tumor sequence allele' ,
515+ proteinChange Nullable(String) COMMENT ' Protein change' ,
516+ mutationType Nullable(String) COMMENT ' Type of mutation' ,
517+ ncbiBuild Nullable(String) COMMENT ' NCBI build version' ,
518+ variantType Nullable(String) COMMENT ' Variant type' ,
519+ refseqMrnaId Nullable(String) COMMENT ' RefSeq mRNA ID' ,
520+ proteinPosStart Nullable(Int64) COMMENT ' Protein position start' ,
521+ proteinPosEnd Nullable(Int64) COMMENT ' Protein position end' ,
522+ keyword Nullable(String) COMMENT ' Keyword' ,
523+ annotationJSON Nullable(String) COMMENT ' Annotation JSON' ,
524+ driverFilter Nullable(String) COMMENT ' Driver filter' ,
525+ driverFilterAnnotation Nullable(String) COMMENT ' Driver filter annotation' ,
526+ driverTiersFilter Nullable(String) COMMENT ' Driver tiers filter' ,
527+ driverTiersFilterAnnotation Nullable(String) COMMENT ' Driver tiers filter annotation' ,
528+ ` GENE.entrezGeneId` Nullable(Int64) COMMENT ' Gene entrez ID' ,
529+ ` GENE.hugoGeneSymbol` Nullable(String) COMMENT ' HUGO gene symbol' ,
530+ ` GENE.type` Nullable(String) COMMENT ' Gene type' ,
531+ ` alleleSpecificCopyNumber.ascnIntegerCopyNumber` Nullable(Int64) COMMENT ' ASCN integer copy number' ,
532+ ` alleleSpecificCopyNumber.ascnMethod` Nullable(String) COMMENT ' ASCN method' ,
533+ ` alleleSpecificCopyNumber.ccfExpectedCopiesUpper` Nullable(Float64) COMMENT ' CCF expected copies upper bound' ,
534+ ` alleleSpecificCopyNumber.ccfExpectedCopies` Nullable(Float64) COMMENT ' CCF expected copies' ,
535+ ` alleleSpecificCopyNumber.clonal` Nullable(String) COMMENT ' Clonality annotation' ,
536+ ` alleleSpecificCopyNumber.minorCopyNumber` Nullable(Int64) COMMENT ' Minor copy number' ,
537+ ` alleleSpecificCopyNumber.expectedAltCopies` Nullable(Int64) COMMENT ' Expected alternate copies' ,
538+ ` alleleSpecificCopyNumber.totalCopyNumber` Nullable(Int64) COMMENT ' Total copy number'
539+ )
540+ ENGINE = MergeTree()
541+ ORDER BY (molecularProfileId, sampleId, entrezGeneId)
542+ COMMENT ' Mutation query results with detailed annotations including driver status and allele-specific copy numbers' ;
543+
544+ INSERT INTO mutation_derived
545+ SELECT
546+ genetic_profile .stable_id AS molecularProfileId,
547+ sample .stable_id AS sampleId,
548+ sample .internal_id As sampleInternalId,
549+ patient .stable_id AS patientId,
550+ mutation .entrez_gene_id AS entrezGeneId,
551+ cancer_study .cancer_study_identifier AS studyId,
552+ mutation .center AS center,
553+ mutation .mutation_status AS mutationStatus,
554+ mutation .validation_status AS validationStatus,
555+ mutation .tumor_alt_count AS tumorAltCount,
556+ mutation .tumor_ref_count AS tumorRefCount,
557+ mutation .normal_alt_count AS normalAltCount,
558+ mutation .normal_ref_count AS normalRefCount,
559+ mutation .amino_acid_change AS aminoAcidChange,
560+ mutation_event .chr AS chr,
561+ mutation_event .start_position AS startPosition,
562+ mutation_event .end_position AS endPosition,
563+ mutation_event .reference_allele AS referenceAllele,
564+ mutation_event .tumor_seq_allele AS tumorSeqAllele,
565+ mutation_event .protein_change AS proteinChange,
566+ mutation_event .mutation_type AS mutationType,
567+ mutation_event .ncbi_build AS ncbiBuild,
568+ mutation_event .variant_type AS variantType,
569+ mutation_event .refseq_mrna_id AS refseqMrnaId,
570+ mutation_event .protein_pos_start AS proteinPosStart,
571+ mutation_event .protein_pos_end AS proteinPosEnd,
572+ mutation_event .keyword AS keyword,
573+ mutation .annotation_json AS annotationJSON,
574+ alteration_driver_annotation .driver_filter AS driverFilter,
575+ alteration_driver_annotation .driver_filter_annotation AS driverFilterAnnotation,
576+ alteration_driver_annotation .driver_tiers_filter AS driverTiersFilter,
577+ alteration_driver_annotation .driver_tiers_filter_annotation AS driverTiersFilterAnnotation,
578+ gene .entrez_gene_id AS ` GENE.entrezGeneId` ,
579+ gene .hugo_gene_symbol AS ` GENE.hugoGeneSymbol` ,
580+ gene .type AS ` GENE.type` ,
581+ allele_specific_copy_number .ascn_integer_copy_number AS ` alleleSpecificCopyNumber.ascnIntegerCopyNumber` ,
582+ allele_specific_copy_number .ascn_method AS ` alleleSpecificCopyNumber.ascnMethod` ,
583+ allele_specific_copy_number .ccf_expected_copies_upper AS ` alleleSpecificCopyNumber.ccfExpectedCopiesUpper` ,
584+ allele_specific_copy_number .ccf_expected_copies AS ` alleleSpecificCopyNumber.ccfExpectedCopies` ,
585+ allele_specific_copy_number .clonal AS ` alleleSpecificCopyNumber.clonal` ,
586+ allele_specific_copy_number .minor_copy_number AS ` alleleSpecificCopyNumber.minorCopyNumber` ,
587+ allele_specific_copy_number .expected_alt_copies AS ` alleleSpecificCopyNumber.expectedAltCopies` ,
588+ allele_specific_copy_number .total_copy_number AS ` alleleSpecificCopyNumber.totalCopyNumber`
589+ FROM mutation
590+ INNER JOIN genetic_profile ON mutation .genetic_profile_id = genetic_profile .genetic_profile_id
591+ INNER JOIN sample ON mutation .sample_id = sample .internal_id
592+ INNER JOIN patient ON sample .patient_id = patient .internal_id
593+ INNER JOIN cancer_study ON patient .cancer_study_id = cancer_study .cancer_study_id
594+ LEFT JOIN alteration_driver_annotation ON (mutation .genetic_profile_id = alteration_driver_annotation .genetic_profile_id ) AND (mutation .sample_id = alteration_driver_annotation .sample_id ) AND (mutation .mutation_event_id = alteration_driver_annotation .alteration_event_id )
595+ INNER JOIN mutation_event ON mutation .mutation_event_id = mutation_event .mutation_event_id
596+ INNER JOIN gene ON mutation .entrez_gene_id = gene .entrez_gene_id
597+ LEFT JOIN allele_specific_copy_number ON (mutation .mutation_event_id = allele_specific_copy_number .mutation_event_id ) AND (mutation .genetic_profile_id = allele_specific_copy_number .genetic_profile_id ) AND (mutation .sample_id = allele_specific_copy_number .sample_id );
598+
599+
600+
601+
602+ -- START: PRIMARY KEY ADDITIONS
603+ -- THE FOLLOWING SCRIPTS EXIST TO ADD PRIMARY KEYS TO LEGACY TABLES THAT ARE MISSING THEM. YOU
604+ -- CANNOT CHANGE THE PRIMARY KEY ON A TABLE IN CLICKHOUSE, SO WE NEED TO CREATE A NEW TABLE WITH THE
605+ -- PRIMARY KEY AND THEN COPY THE DATA OVER.
606+
607+
608+ -- Adds primary key to the sample_cna_event table for Clickhouse-only
609+ DROP TABLE IF EXISTS sample_cna_event_BACKUP;
610+ CREATE TABLE sample_cna_event_BACKUP
611+ (
612+ ` cna_event_id` Int64 COMMENT ' References cna_event.cna_event_id.' ,
613+ ` sample_id` Int64 COMMENT ' References sample.internal_id.' ,
614+ ` genetic_profile_id` Int64 COMMENT ' References genetic_profile.genetic_profile_id.' ,
615+ ` annotation_json` Nullable(String) COMMENT ' JSON-formatted annotation details.'
616+ )
617+ ENGINE = MergeTree()
618+ PRIMARY KEY (genetic_profile_id, cna_event_id, sample_id)
619+ ORDER BY (genetic_profile_id, cna_event_id, sample_id)
620+ SETTINGS index_granularity = 8192
621+ COMMENT ' Observed CNA events per sample and profile. References cna_event, sample, and genetic_profile.' ;
622+
623+ -- Copy the data
624+ INSERT INTO sample_cna_event_BACKUP
625+ SELECT * FROM sample_cna_event;
626+
627+ -- SWITCH THE TABLES
628+ EXCHANGE TABLES sample_cna_event_BACKUP AND sample_cna_event;
629+
630+ DROP TABLE IF EXISTS mutation_BACKUP;
631+ CREATE TABLE mutation_BACKUP
632+ (
633+ ` mutation_event_id` Int64 COMMENT ' References mutation_event.mutation_event_id.' ,
634+ ` genetic_profile_id` Int64 COMMENT ' References genetic_profile.genetic_profile_id.' ,
635+ ` sample_id` Int64 COMMENT ' References sample.internal_id.' ,
636+ ` entrez_gene_id` Int64 COMMENT ' References gene.entrez_gene_id.' ,
637+ ` center` Nullable(String) COMMENT ' Center where sequencing was performed.' ,
638+ ` sequencer` Nullable(String) COMMENT ' Sequencing platform used.' ,
639+ ` mutation_status` Nullable(String) COMMENT ' Mutation status: Germline, Somatic, or LOH.' ,
640+ ` validation_status` Nullable(String) COMMENT ' Validation status.' ,
641+ ` tumor_seq_allele1` Nullable(String) COMMENT ' Tumor allele 1 sequence.' ,
642+ ` tumor_seq_allele2` Nullable(String) COMMENT ' Tumor allele 2 sequence.' ,
643+ ` matched_norm_sample_barcode` Nullable(String) COMMENT ' Matched normal sample barcode.' ,
644+ ` match_norm_seq_allele1` Nullable(String) COMMENT ' Matched normal allele 1 sequence.' ,
645+ ` match_norm_seq_allele2` Nullable(String) COMMENT ' Matched normal allele 2 sequence.' ,
646+ ` tumor_validation_allele1` Nullable(String) COMMENT ' Tumor validation allele 1 sequence.' ,
647+ ` tumor_validation_allele2` Nullable(String) COMMENT ' Tumor validation allele 2 sequence.' ,
648+ ` match_norm_validation_allele1` Nullable(String) COMMENT ' Matched normal validation allele 1.' ,
649+ ` match_norm_validation_allele2` Nullable(String) COMMENT ' Matched normal validation allele 2.' ,
650+ ` verification_status` Nullable(String) COMMENT ' Verification status.' ,
651+ ` sequencing_phase` Nullable(String) COMMENT ' Sequencing phase.' ,
652+ ` sequence_source` Nullable(String) COMMENT ' Source of sequencing data.' ,
653+ ` validation_method` Nullable(String) COMMENT ' Validation method used.' ,
654+ ` score` Nullable(String) COMMENT ' Score or quality metric.' ,
655+ ` bam_file` Nullable(String) COMMENT ' Associated BAM file.' ,
656+ ` tumor_alt_count` Nullable(Int64) COMMENT ' Tumor alternate allele count.' ,
657+ ` tumor_ref_count` Nullable(Int64) COMMENT ' Tumor reference allele count.' ,
658+ ` normal_alt_count` Nullable(Int64) COMMENT ' Normal alternate allele count.' ,
659+ ` normal_ref_count` Nullable(Int64) COMMENT ' Normal reference allele count.' ,
660+ ` amino_acid_change` Nullable(String) COMMENT ' Amino acid change from mutation.' ,
661+ ` annotation_json` Nullable(String) COMMENT ' JSON-formatted annotations.'
662+ )
663+ ENGINE = MergeTree()
664+ ORDER BY (genetic_profile_id,entrez_gene_id)
665+ COMMENT ' Mutation observations in specific samples and profiles. References mutation_event, gene, genetic_profile, and sample.' ;
666+
667+ -- copy data into new table
668+ INSERT INTO mutation_BACKUP
669+ SELECT * FROM mutation;
670+
671+ -- switch the tables
672+ EXCHANGE TABLES mutation_BACKUP AND mutation;
673+
674+
675+ -- Adds primary key genetic_alteration table for Clickhouse-only
676+ DROP TABLE IF EXISTS genetic_alteration_BACKUP;
677+ CREATE TABLE genetic_alteration_BACKUP
678+ (
679+ ` genetic_profile_id` Int64,
680+ ` genetic_entity_id` Int64,
681+ ` values` String
682+ )
683+ ENGINE = MergeTree()
684+ ORDER BY (genetic_profile_id, genetic_entity_id);
685+
686+ -- Copy the data
687+ INSERT INTO genetic_alteration_BACKUP
688+ SELECT * FROM genetic_alteration;
689+
690+ -- SWITCH THE TABLES
691+ EXCHANGE TABLES genetic_alteration_BACKUP AND genetic_alteration;
692+
693+ -- END: PRIMARY KEY ADDITIONS
694+
695+
488696OPTIMIZE TABLE sample_to_gene_panel_derived;
489697OPTIMIZE TABLE gene_panel_to_gene_derived;
490698OPTIMIZE TABLE sample_derived;
0 commit comments