|
| 1 | +"""Create an analysis metadata TSV file and gather key outputs for a |
| 2 | +Tourmaline run. |
| 3 | +
|
| 4 | +Reads Tourmaline step configs (qaqc, repseqs, taxonomy) plus a |
| 5 | +Tourmaline metadata YAML, then writes {analysis_run_name}_analysisMetadata.tsv |
| 6 | +and copies selected artifacts (taxonomy asv_taxa_features.tsv, repseqs |
| 7 | +table.tsv) into one output directory. |
| 8 | +
|
| 9 | +Usage (common): |
| 10 | + python tourmaline/scripts/format_analysisMetadata.py \ |
| 11 | + -w WORKDIR -q QAQC_RUN -r REPSEQS_RUN -t TAX_RUN \ |
| 12 | + -p PROJECT_ID -O OUT_DIR [-a ASSAY] [-A ANALYSIS] [-T META_YAML] |
| 13 | +""" |
| 14 | + |
1 | 15 | import argparse |
2 | 16 | import yaml |
3 | 17 | import pandas as pd |
4 | 18 | import os |
5 | 19 | import glob |
6 | 20 | import shutil |
| 21 | +from datetime import datetime |
7 | 22 |
|
8 | 23 | ## ADD check for repeated run names, place to add project_id, assay_name, user provided terms |
9 | 24 |
|
@@ -137,12 +152,12 @@ def assign_collapse(taxa): |
137 | 152 | def main(): |
138 | 153 | parser = argparse.ArgumentParser(description="Generate a single TSV file from multiple YAML files.") |
139 | 154 | parser.add_argument('-w','--working_dir', required=True, help='Working directory containing step folders') |
140 | | - parser.add_argument('-s','--qaqc_run_name', required=True, help='Run name for qaqc step') |
| 155 | + parser.add_argument('-q','--qaqc_run_name', required=True, help='Run name for qaqc step') |
141 | 156 | parser.add_argument('-r','--repseqs_run_name', required=True, help='Run name for repseqs step') |
142 | 157 | parser.add_argument('-t','--taxonomy_run_name', required=True, help='Run name for taxonomy step') |
143 | 158 | parser.add_argument('-p','--project_id', required=True, help='Value for project_id') |
144 | | - parser.add_argument('-a','--assay_name', help='Value for assay_name, otherwise use value in qaqc config') |
145 | | - parser.add_argument('-A','--analysis_run_name', help='Value for analysis_run_name, otherwise use taxonomy run name') |
| 159 | + parser.add_argument('-a','--assay_name', help='Value for assay_name, otherwise uses value in qaqc config') |
| 160 | + parser.add_argument('-A','--analysis_run_name', help='Value for analysis_run_name, otherwise uses taxonomy run name') |
146 | 161 | parser.add_argument('-T','--tourmaline_metadata',default="./00-data/tourmaline_metadata.yaml", help='Path to tourmaline metadata') |
147 | 162 | parser.add_argument('-O','--output_folder', required=True, help='Output folder path where files will be saved') |
148 | 163 | #parser.add_argument('--checklist', required=True, help='Path to the CSV file with metadata terms') |
@@ -181,15 +196,17 @@ def main(): |
181 | 196 | taxa3 = load_yaml(taxonomy_config_path) |
182 | 197 | tour = load_yaml(args.tourmaline_metadata) |
183 | 198 | project_id = args.project_id |
184 | | - assay_name = args.assay_name if args.assay_name else qaqc1['amplicon_name'] |
| 199 | + assay_name = args.assay_name if args.assay_name else qaqc1['assay_name'] |
185 | 200 | analysis_run_name = args.analysis_run_name if args.analysis_run_name else args.taxonomy_run_name |
| 201 | + analysis_run_date = datetime.now().strftime("%Y-%m-%d") |
186 | 202 |
|
187 | 203 | # MAPPINGS |
188 | 204 | mappings = { |
189 | 205 | # FAIR eDNA TERMS |
190 | 206 | 'project_id': project_id, |
191 | 207 | 'assay_name': assay_name, |
192 | 208 | 'analysis_run_name': analysis_run_name, |
| 209 | + 'analysis_run_date': analysis_run_date, |
193 | 210 | "sop_bioinformatics": tour['sop_bioinformatics'], |
194 | 211 | "trim_method": trim_paramF(qaqc1,repseqs2,tour)[0], |
195 | 212 | "trim_param": trim_paramF(qaqc1,repseqs2,tour)[1], |
@@ -266,7 +283,7 @@ def main(): |
266 | 283 | # Save the combined data to the output TSV file |
267 | 284 | try: |
268 | 285 | # Generate output filename with analysis_run_name prefix |
269 | | - metadata_filename = f"{analysis_run_name}_metadata.tsv" |
| 286 | + metadata_filename = f"{analysis_run_name}_analysisMetadata.tsv" |
270 | 287 | metadata_path = os.path.join(args.output_folder, metadata_filename) |
271 | 288 | dict_to_tsv(mappings, metadata_path) |
272 | 289 | print(f"Successfully generated metadata file: {metadata_path}") |
|
0 commit comments