Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ rule get_accessions_for_species:
Step 4 should ensure that this specific rule doesn't run, and that only the desired accessions are loaded.
"""
log: "get_accessions_for_species.log"
input:
exclusion_file=config['exclude_accessions']
params:
species=species_for_db(config['species']),
atlas_env_file=config['atlas_env_file']
Expand All @@ -220,6 +222,14 @@ rule get_accessions_for_species:
-v ON_ERROR_STOP=1 $dbConnection > {output.accessions}
psql -c "COPY (SELECT accession FROM experiment WHERE species LIKE '{params.species}%' AND type LIKE '%BASELINE%' ORDER BY load_date) TO STDOUT WITH NULL AS ''" \
-v ON_ERROR_STOP=1 $dbConnection > {output.baseline_accessions}

echo {input.exclusion_file}
# Loop through each word in exclude.txt
while IFS= read -r accession && [ -n "$accession" ]; do
# Remove the word from {output.accessions} using grep
grep -v "$accession" {output.accessions} > temp && mv temp {output.accessions}
grep -v "$accession" {output.baseline_accessions} > temp && mv temp {output.baseline_accessions}
done < {input.exclusion_file}
"""

checkpoint divide_accessions_into_chunks:
Expand Down Expand Up @@ -357,6 +367,8 @@ rule update_experiment_designs:
export output_dir={params.output_dir}
export EXPERIMENT_FILES={params.experiment_files}
export server_port=8081 #fake
export PS1="\\u@\\h:\\w\\$ "


input_accessions={input.accessions}

Expand Down Expand Up @@ -444,6 +456,7 @@ rule update_coexpressions:
export output_dir={params.output_dir}
export EXPERIMENT_FILES={params.experiment_files}
export server_port=8081 #fake
export PS1="\\u@\\h:\\w\\$ "

input_accessions={input.baseline_accessions}

Expand Down Expand Up @@ -537,6 +550,7 @@ rule run_bioentities_JSONL_creation:
export EXPERIMENT_FILES={params.experiment_files}
export BIOENTITIES_JSONL_PATH={params.output_dir}
export server_port=8081 #fake
export PS1="\\u@\\h:\\w\\$ "

{micromamba_env}

Expand Down Expand Up @@ -567,6 +581,7 @@ rule delete_species_bioentities_index:
exec &> "{log}"
source {params.atlas_env_file}
export SPECIES={params.species}
export PS1="\\u@\\h:\\w\\$ "

{micromamba_env}

Expand Down Expand Up @@ -608,6 +623,7 @@ rule load_species_into_bioentities_index:
export BIOENTITIES_JSONL_PATH={params.output_dir}
export SPECIES={params.species}
export server_port=8081 #fake
export PS1="\\u@\\h:\\w\\$ "

{micromamba_env}

Expand Down Expand Up @@ -663,6 +679,7 @@ rule analytics_bioentities_mapping:
export output_dir={params.output_dir}
export SPECIES={params.species}
export server_port=8081 #fake
export PS1="\\u@\\h:\\w\\$ "

# needed to trigger an error code exit for mappings
export failed_accessions_output=$prefix"/failed_accessions.txt"
Expand Down Expand Up @@ -719,6 +736,8 @@ rule create_analytics_jsonl_files:
export SPECIES={params.species}
export server_port=8081 #fake
export BIN_MAP={params.mappings_directory}
export PS1="\\u@\\h:\\w\\$ "


input_accessions={input.accessions}

Expand Down Expand Up @@ -784,6 +803,8 @@ rule load_bulk_analytics_index:
export EXPERIMENT_FILES={params.experiment_files}
export SPECIES={params.species}
export server_port=8081 #fake
export PS1="\\u@\\h:\\w\\$ "


input_accessions={input.accessions}

Expand Down
6 changes: 3 additions & 3 deletions log_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", -1)
pd.set_option("display.max_colwidth", None)


def format_percentage(done, total):
Expand Down Expand Up @@ -105,8 +105,8 @@ def log_handler(msg):
# l.info(f"{i} ")
if len(i) == 1:
# update row
df["Error_occurrence"].iloc[i] += 1
df["Error_out"].iloc[i] = error_out
df.loc[i, "Error_occurrence"] += 1
df.loc[i, "Error_out"] = error_out
else:
# add new row
temp_df = pd.DataFrame(
Expand Down