Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions src/phold/results/topfunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,12 +427,25 @@ def calculate_topfunctions_results(
updated_cds_dict[record_id][cds_id].qualifiers["phrog"][0] = (
result_dict[record_id][cds_id]["phrog"]
)
updated_cds_dict[record_id][cds_id].qualifiers["product"][0] = (
result_dict[record_id][cds_id]["product"]
)
updated_cds_dict[record_id][cds_id].qualifiers["function"][
0
] = result_dict[record_id][cds_id]["function"]

# Handle missing product qualifier - see https://www.ncbi.nlm.nih.gov/nuccore/OY726582.1/ CAJ1523274.1
if "product" not in updated_cds_dict[record_id][cds_id].qualifiers:
updated_cds_dict[record_id][cds_id].qualifiers["product"] = [
result_dict[record_id][cds_id]["product"]
]
else:
updated_cds_dict[record_id][cds_id].qualifiers["product"][0] = (
result_dict[record_id][cds_id]["product"]
)
# Handle missing function qualifier
if "function" not in updated_cds_dict[record_id][cds_id].qualifiers:
updated_cds_dict[record_id][cds_id].qualifiers["function"] = [
result_dict[record_id][cds_id]["function"]
]
else:
updated_cds_dict[record_id][cds_id].qualifiers["function"][0] = (
result_dict[record_id][cds_id]["function"]
)

# pharokka has a phrog (or genbank doesn't exist)
else:
Expand Down
46 changes: 31 additions & 15 deletions src/phold/subcommands/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,23 +139,39 @@ def subcommand_compare(
cds_feature.qualifiers["ID"][0]
] = cds_feature

# not pharokka - must be from genbank (supported only)
# not pharokka - must be from ncbi genbank (supported only)
except:
try:
# add these extra fields to make it all play nice
cds_feature.qualifiers["ID"] = cds_feature.qualifiers[
"protein_id"
]
cds_feature.qualifiers["function"] = []
cds_feature.qualifiers["function"].append(
"unknown function"
)
cds_feature.qualifiers["phrog"] = []
cds_feature.qualifiers["phrog"].append("No_PHROG")

cds_dict[record_id][
cds_feature.qualifiers["ID"][0]
] = cds_feature
# some NCBI Genbank CDS are actually pseudos
# e.g. OM418625

# CDS 19638..19895
# /locus_tag="CPT_lambdaimm21_023"
# /pseudogene="unknown"
# /codon_start=1
# /transl_table=11
# /product="tail fiber protein stf"
if "pseudogene" in cds_feature.qualifiers:

#logger.warning(f"Skipping pseudogene: {cds_feature}")
continue
else:

# add these extra fields to make it all play nice
cds_feature.qualifiers["ID"] = cds_feature.qualifiers[
"protein_id"
]
cds_feature.qualifiers["function"] = []
cds_feature.qualifiers["function"].append(
"unknown function"
)
cds_feature.qualifiers["phrog"] = []
cds_feature.qualifiers["phrog"].append("No_PHROG")

cds_dict[record_id][
cds_feature.qualifiers["ID"][0]
] = cds_feature

except:
logger.error(
Expand All @@ -175,7 +191,7 @@ def subcommand_compare(

i = 1
for non_cds_feature in record.features:
if non_cds_feature.type != "CDS":
if non_cds_feature.type != "CDS" or (non_cds_feature.type == "CDS" and "pseudogene" in cds_feature.qualifiers): # captures the pseudos
try:
non_cds_dict[record_id][
non_cds_feature.qualifiers["ID"][0]
Expand Down
25 changes: 25 additions & 0 deletions src/phold/subcommands/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,31 @@ def subcommand_predict(
# due to the weird list issue when parsing from genbank file
if fasta_flag is False:

# cds_feature.qualifiers["translation"] = cds_feature.qualifiers[
# "translation"
# ][0]

# some NCBI Genbank CDS are actually pseudos
# e.g. OM418625

# CDS 19638..19895
# /locus_tag="CPT_lambdaimm21_023"
# /pseudogene="unknown"
# /codon_start=1
# /transl_table=11
# /product="tail fiber protein stf"

if (
"translation" not in cds_feature.qualifiers
or len(cds_feature.qualifiers["translation"]) == 0
):
logger.warning(
f"Skipping CDS without provided translation in input, likely a pseudogene"
)
logger.warning(f"CDS: {cds_feature}"
)
continue

cds_feature.qualifiers["translation"] = cds_feature.qualifiers[
"translation"
][0]
Expand Down
2 changes: 1 addition & 1 deletion src/phold/utils/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.5
1.2.6
Loading
Loading