Skip to content

Commit d6a5111

Browse files
committed
Resolves #165 , added documentation
1 parent 73ce220 commit d6a5111

File tree

1 file changed

+22
-5
lines changed

1 file changed

+22
-5
lines changed

scripts/format_analysisMetadata.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,24 @@
1+
"""Create an analysis metadata TSV file and gather key outputs for a
2+
Tourmaline run.
3+
4+
Reads Tourmaline step configs (qaqc, repseqs, taxonomy) plus a
5+
Tourmaline metadata YAML, then writes {analysis_run_name}_analysisMetadata.tsv
6+
and copies selected artifacts (taxonomy asv_taxa_features.tsv, repseqs
7+
table.tsv) into one output directory.
8+
9+
Usage (common):
10+
python tourmaline/scripts/format_analysisMetadata.py \
11+
-w WORKDIR -q QAQC_RUN -r REPSEQS_RUN -t TAX_RUN \
12+
-p PROJECT_ID -O OUT_DIR [-a ASSAY] [-A ANALYSIS] [-T META_YAML]
13+
"""
14+
115
import argparse
216
import yaml
317
import pandas as pd
418
import os
519
import glob
620
import shutil
21+
from datetime import datetime
722

823
## ADD check for repeated run names, place to add project_id, assay_name, user provided terms
924

@@ -137,12 +152,12 @@ def assign_collapse(taxa):
137152
def main():
138153
parser = argparse.ArgumentParser(description="Generate a single TSV file from multiple YAML files.")
139154
parser.add_argument('-w','--working_dir', required=True, help='Working directory containing step folders')
140-
parser.add_argument('-s','--qaqc_run_name', required=True, help='Run name for qaqc step')
155+
parser.add_argument('-q','--qaqc_run_name', required=True, help='Run name for qaqc step')
141156
parser.add_argument('-r','--repseqs_run_name', required=True, help='Run name for repseqs step')
142157
parser.add_argument('-t','--taxonomy_run_name', required=True, help='Run name for taxonomy step')
143158
parser.add_argument('-p','--project_id', required=True, help='Value for project_id')
144-
parser.add_argument('-a','--assay_name', help='Value for assay_name, otherwise use value in qaqc config')
145-
parser.add_argument('-A','--analysis_run_name', help='Value for analysis_run_name, otherwise use taxonomy run name')
159+
parser.add_argument('-a','--assay_name', help='Value for assay_name, otherwise uses value in qaqc config')
160+
parser.add_argument('-A','--analysis_run_name', help='Value for analysis_run_name, otherwise uses taxonomy run name')
146161
parser.add_argument('-T','--tourmaline_metadata',default="./00-data/tourmaline_metadata.yaml", help='Path to tourmaline metadata')
147162
parser.add_argument('-O','--output_folder', required=True, help='Output folder path where files will be saved')
148163
#parser.add_argument('--checklist', required=True, help='Path to the CSV file with metadata terms')
@@ -181,15 +196,17 @@ def main():
181196
taxa3 = load_yaml(taxonomy_config_path)
182197
tour = load_yaml(args.tourmaline_metadata)
183198
project_id = args.project_id
184-
assay_name = args.assay_name if args.assay_name else qaqc1['amplicon_name']
199+
assay_name = args.assay_name if args.assay_name else qaqc1['assay_name']
185200
analysis_run_name = args.analysis_run_name if args.analysis_run_name else args.taxonomy_run_name
201+
analysis_run_date = datetime.now().strftime("%Y-%m-%d")
186202

187203
# MAPPINGS
188204
mappings = {
189205
# FAIR eDNA TERMS
190206
'project_id': project_id,
191207
'assay_name': assay_name,
192208
'analysis_run_name': analysis_run_name,
209+
'analysis_run_date': analysis_run_date,
193210
"sop_bioinformatics": tour['sop_bioinformatics'],
194211
"trim_method": trim_paramF(qaqc1,repseqs2,tour)[0],
195212
"trim_param": trim_paramF(qaqc1,repseqs2,tour)[1],
@@ -266,7 +283,7 @@ def main():
266283
# Save the combined data to the output TSV file
267284
try:
268285
# Generate output filename with analysis_run_name prefix
269-
metadata_filename = f"{analysis_run_name}_metadata.tsv"
286+
metadata_filename = f"{analysis_run_name}_analysisMetadata.tsv"
270287
metadata_path = os.path.join(args.output_folder, metadata_filename)
271288
dict_to_tsv(mappings, metadata_path)
272289
print(f"Successfully generated metadata file: {metadata_path}")

0 commit comments

Comments
 (0)