Skip to content

Commit 31baa9b

Browse files
committed
PR suggestions
1 parent fa9fe7a commit 31baa9b

File tree

10 files changed

+86
-130
lines changed

10 files changed

+86
-130
lines changed

variant/tokenizers/amino_acid_deletion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def __init__(self, amino_acid_cache: AminoAcidCache) -> None:
1717
:param AminoAcidCache amino_acid_cache: Valid amino acid codes.
1818
"""
1919
self.amino_acid_cache = amino_acid_cache
20-
self.splitter = re.compile(r'del')
20+
self.splitter = re.compile('del')
2121
self.splitter_char_digit = re.compile("([a-zA-Z]+)([0-9]+)")
2222
self.parts = None
2323

variant/validators/amino_acid_deletion.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,11 @@ def validate(self, classification: Classification) \
5454
errors.append(f"Non matching tokens found for "
5555
f"{self.variant_name()}.")
5656

57-
if len(gene_tokens) == 0:
58-
errors.append(f'No gene tokens for a {self.variant_name()}.')
57+
len_gene_tokens = len(gene_tokens)
5958

60-
if len(gene_tokens) > 1:
59+
if len_gene_tokens == 0:
60+
errors.append(f'No gene tokens for a {self.variant_name()}.')
61+
elif len_gene_tokens > 1:
6162
errors.append('More than one gene symbol found for a single'
6263
f' {self.variant_name()}')
6364

@@ -106,15 +107,16 @@ def get_valid_invalid_results(self, classification_tokens, transcripts,
106107
hgvs_expr = self.get_hgvs_expr(classification, t, s, False)
107108
allele = self.get_allele_from_hgvs(hgvs_expr, errors)
108109

110+
# MANE Select Transcript
111+
if hgvs_expr not in mane_transcripts_dict.keys():
112+
mane_transcripts_dict[hgvs_expr] = {
113+
'classification_token': s,
114+
'transcript_token': t
115+
}
116+
109117
if not allele:
110118
errors.append("Unable to find allele.")
111119
else:
112-
# MANE Select Transcript
113-
if hgvs_expr not in mane_transcripts_dict.keys():
114-
mane_transcripts_dict[hgvs_expr] = {
115-
'classification_token': s,
116-
'transcript_token': t
117-
}
118120
if len(allele['state']['sequence']) == 3:
119121
allele['state']['sequence'] = \
120122
self._amino_acid_cache.convert_three_to_one(
@@ -151,7 +153,7 @@ def check_ref_aa(self, t, aa, pos, errors):
151153
152154
:param string t: Transcript
153155
:param str aa: Expected Amino Acid
154-
:param str pos: Expected position
156+
:param int pos: Expected position
155157
:param list errors: List of errors
156158
"""
157159
ref_aa_del = \
@@ -174,9 +176,9 @@ def get_hgvs_expr(self, classification, t, s, is_hgvs) -> str:
174176
:param Classification classification: A classification for a list of
175177
tokens
176178
:param str t: Transcript retrieved from transcript mapping
179+
:param Token s: The classification token
177180
:param bool is_hgvs: Whether or not classification is HGVS token
178-
:return: A tuple containing the hgvs expression and whether or not
179-
it's an Ensembl Transcript
181+
:return: HGVS expression for the variant
180182
"""
181183
if not is_hgvs:
182184
prefix = f"{t}:{s.reference_sequence.lower()}."

variant/validators/amino_acid_delins.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -106,15 +106,16 @@ def get_valid_invalid_results(self, classification_tokens, transcripts,
106106
hgvs_expr = self.get_hgvs_expr(classification, t, s, False)
107107
allele = self.get_allele_from_hgvs(hgvs_expr, errors)
108108

109+
# MANE Select Transcript
110+
if hgvs_expr not in mane_transcripts_dict.keys():
111+
mane_transcripts_dict[hgvs_expr] = {
112+
'classification_token': s,
113+
'transcript_token': t
114+
}
115+
109116
if not allele:
110117
errors.append("Unable to find allele.")
111118
else:
112-
# MANE Select Transcript
113-
if hgvs_expr not in mane_transcripts_dict.keys():
114-
mane_transcripts_dict[hgvs_expr] = {
115-
'classification_token': s,
116-
'transcript_token': t
117-
}
118119
if len(allele['state']['sequence']) == 3:
119120
allele['state']['sequence'] = \
120121
self._amino_acid_cache.convert_three_to_one(
@@ -151,7 +152,7 @@ def check_ref_aa(self, t, aa, pos, errors):
151152
152153
:param string t: Transcript
153154
:param str aa: Expected Amino Acid
154-
:param str pos: Expected position
155+
:param int pos: Expected position
155156
:param list errors: List of errors
156157
"""
157158
ref_aa_del = \

variant/validators/coding_dna_deletion.py

Lines changed: 7 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44
ClassificationType
55
from variant.schemas.token_response_schema import CodingDNADeletionToken
66
from variant.schemas.validation_response_schema import LookupType
7-
from typing import List, Tuple
7+
from typing import List
88
from variant.schemas.classification_response_schema import Classification
99
from variant.schemas.token_response_schema import GeneMatchToken
1010
from variant.schemas.validation_response_schema import ValidationResult
11-
from variant.schemas.token_response_schema import Token
1211
import logging
1312

1413

@@ -63,36 +62,6 @@ def validate(self, classification: Classification) \
6362
classification, results, gene_tokens)
6463
return results
6564

66-
def get_hgvs_expr(self, classification, t, s, is_hgvs) -> Tuple[str, bool]:
67-
"""Return HGVS expression and whether or not it's an Ensembl transcript
68-
69-
:param Classification classification: A classification for a list of
70-
tokens
71-
:param str t: Transcript retrieved from transcript mapping
72-
:param bool is_hgvs: Whether or not classification is HGVS token
73-
:return: A tuple containing the hgvs expression and whether or not
74-
it's an Ensembl Transcript
75-
"""
76-
if not is_hgvs:
77-
prefix = f"{t}:{s.reference_sequence.lower()}.{s.start_pos_del}"
78-
if s.end_pos_del:
79-
prefix += f"_{s.end_pos_del}"
80-
hgvs_expr = f"{prefix}del"
81-
if s.deleted_sequence:
82-
hgvs_expr += f"{s.deleted_sequence}"
83-
else:
84-
hgvs_token = [t for t in classification.all_tokens if
85-
isinstance(t, Token) and t.token_type == 'HGVS'][0]
86-
hgvs_expr = hgvs_token.input_string
87-
88-
gene_token = [t for t in classification.all_tokens
89-
if t.token_type == 'GeneSymbol']
90-
if gene_token:
91-
is_ensembl_transcript = True
92-
else:
93-
is_ensembl_transcript = False
94-
return hgvs_expr, is_ensembl_transcript
95-
9665
def get_valid_invalid_results(self, classification_tokens, transcripts,
9766
classification, results, gene_tokens) \
9867
-> None:
@@ -128,13 +97,13 @@ def get_valid_invalid_results(self, classification_tokens, transcripts,
12897
)
12998
allele = self.get_allele_from_hgvs(hgvs_expr, errors)
13099

131-
if allele:
132-
mane_transcripts_dict[hgvs_expr] = {
133-
'classification_token': s,
134-
'transcript_token': t,
135-
'is_ensembl_transcript': is_ensembl_transcript
136-
}
100+
mane_transcripts_dict[hgvs_expr] = {
101+
'classification_token': s,
102+
'transcript_token': t,
103+
'is_ensembl_transcript': is_ensembl_transcript
104+
}
137105

106+
if allele:
138107
ref_sequence = self.get_reference_sequence(t, s, errors)
139108

140109
if ref_sequence and s.deleted_sequence:

variant/validators/coding_dna_delins.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,13 @@ def get_valid_invalid_results(self, classification_tokens, transcripts,
135135
)
136136
allele = self.get_allele_from_hgvs(hgvs_expr, errors)
137137

138-
if allele:
139-
mane_transcripts_dict[hgvs_expr] = {
140-
'classification_token': s,
141-
'transcript_token': t,
142-
'is_ensembl_transcript': is_ensembl_transcript
143-
}
138+
mane_transcripts_dict[hgvs_expr] = {
139+
'classification_token': s,
140+
'transcript_token': t,
141+
'is_ensembl_transcript': is_ensembl_transcript
142+
}
144143

144+
if allele:
145145
len_of_seq = self.seqrepo_access.len_of_sequence(t)
146146
is_len_lt_end = len_of_seq < int(s.end_pos_del) - 1
147147
is_len_lt_start = \

variant/validators/coding_dna_substitution.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,13 @@ def get_valid_invalid_results(self, classification_tokens, transcripts,
129129

130130
allele = self.get_allele_from_hgvs(hgvs_expr, errors)
131131

132-
if allele:
133-
mane_transcripts_dict[hgvs_expr] = {
134-
'classification_token': s,
135-
'transcript_token': t,
136-
'is_ensembl_transcript': is_ensembl_transcript
137-
}
132+
mane_transcripts_dict[hgvs_expr] = {
133+
'classification_token': s,
134+
'transcript_token': t,
135+
'is_ensembl_transcript': is_ensembl_transcript
136+
}
138137

138+
if allele:
139139
ref_nuc = \
140140
self.seqrepo_access.sequence_at_position(t, s.position)
141141
self.check_ref_nucleotide(ref_nuc, s, t, errors)

variant/validators/deletion_base.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""The module for Deletion Validation."""
2-
from abc import abstractmethod
3-
from typing import Optional
4-
2+
from typing import Optional, Tuple
3+
from variant.schemas.token_response_schema import Token
54
from variant.validators.validator import Validator
65
import logging
76

@@ -12,15 +11,36 @@
1211
class DeletionBase(Validator):
1312
"""The Deletion Validator Base class."""
1413

15-
@abstractmethod
16-
def get_hgvs_expr(self, classification, t, s, is_hgvs):
17-
"""Return a HGVS expression.
14+
def get_hgvs_expr(self, classification, t, s, is_hgvs) -> Tuple[str, bool]:
15+
"""Return HGVS expression and whether or not it's an Ensembl transcript
1816
1917
:param Classification classification: A classification for a list of
2018
tokens
2119
:param str t: Transcript retrieved from transcript mapping
20+
:param Token s: The classification token
21+
:param bool is_hgvs: Whether or not classification is HGVS token
22+
:return: A tuple containing the hgvs expression and whether or not
23+
it's an Ensembl Transcript
2224
"""
23-
raise NotImplementedError
25+
if not is_hgvs:
26+
prefix = f"{t}:{s.reference_sequence.lower()}.{s.start_pos_del}"
27+
if s.end_pos_del:
28+
prefix += f"_{s.end_pos_del}"
29+
hgvs_expr = f"{prefix}del"
30+
if s.deleted_sequence:
31+
hgvs_expr += f"{s.deleted_sequence}"
32+
else:
33+
hgvs_token = [t for t in classification.all_tokens if
34+
isinstance(t, Token) and t.token_type == 'HGVS'][0]
35+
hgvs_expr = hgvs_token.input_string
36+
37+
gene_token = [t for t in classification.all_tokens
38+
if t.token_type == 'GeneSymbol']
39+
if gene_token:
40+
is_ensembl_transcript = True
41+
else:
42+
is_ensembl_transcript = False
43+
return hgvs_expr, is_ensembl_transcript
2444

2545
def get_reference_sequence(self, t, s, errors) -> Optional[str]:
2646
"""Get deleted reference sequence.

variant/validators/genomic_deletion.py

Lines changed: 6 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from variant.schemas.classification_response_schema import Classification
99
from variant.schemas.token_response_schema import GeneMatchToken
1010
from variant.schemas.validation_response_schema import ValidationResult
11-
from variant.schemas.token_response_schema import Token
1211
import logging
1312

1413

@@ -56,40 +55,6 @@ def validate(self, classification: Classification) \
5655
classification, results, gene_tokens)
5756
return results
5857

59-
def get_hgvs_expr(self, classification, t, s, is_hgvs) -> tuple:
60-
"""Return HGVS expression and whether or not it's an Ensembl transcript
61-
62-
:param Classification classification: A classification for a list of
63-
tokens
64-
:param str t: Transcript retrieved from transcript mapping
65-
:param bool is_hgvs: Whether or not classification is HGVS token
66-
:return: A tuple containing the hgvs expression and whether or not
67-
it's an Ensembl Transcript
68-
"""
69-
if t.startswith('ENST'):
70-
# TODO
71-
return None, True
72-
73-
if not is_hgvs:
74-
prefix = f"{t}:{s.reference_sequence.lower()}.{s.start_pos_del}"
75-
if s.end_pos_del:
76-
prefix += f"_{s.end_pos_del}"
77-
hgvs_expr = f"{prefix}del"
78-
if s.deleted_sequence:
79-
hgvs_expr += f"{s.deleted_sequence}"
80-
else:
81-
hgvs_token = [t for t in classification.all_tokens if
82-
isinstance(t, Token) and t.token_type == 'HGVS'][0]
83-
hgvs_expr = hgvs_token.input_string
84-
85-
gene_token = [t for t in classification.all_tokens
86-
if t.token_type == 'GeneSymbol']
87-
if gene_token:
88-
is_ensembl_transcript = True
89-
else:
90-
is_ensembl_transcript = False
91-
return hgvs_expr, is_ensembl_transcript
92-
9358
def get_valid_invalid_results(self, classification_tokens, transcripts,
9459
classification, results, gene_tokens) \
9560
-> None:
@@ -118,13 +83,13 @@ def get_valid_invalid_results(self, classification_tokens, transcripts,
11883
self.get_hgvs_expr(classification, t, s, False)
11984
allele = self.get_allele_from_hgvs(hgvs_expr, errors)
12085

121-
if allele:
122-
mane_transcripts_dict[hgvs_expr] = {
123-
'classification_token': s,
124-
'transcript_token': t,
125-
'is_ensembl_transcript': is_ensembl_transcript
126-
}
86+
mane_transcripts_dict[hgvs_expr] = {
87+
'classification_token': s,
88+
'transcript_token': t,
89+
'is_ensembl_transcript': is_ensembl_transcript
90+
}
12791

92+
if allele:
12893
ref_sequence = self.get_reference_sequence(t, s, errors)
12994

13095
if ref_sequence and s.deleted_sequence:

variant/validators/genomic_delins.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,13 @@ def get_valid_invalid_results(self, classification_tokens, transcripts,
125125
self.get_hgvs_expr(classification, t, s, False)
126126
allele = self.get_allele_from_hgvs(hgvs_expr, errors)
127127

128-
if allele:
129-
mane_transcripts_dict[hgvs_expr] = {
130-
'classification_token': s,
131-
'transcript_token': t,
132-
'is_ensembl_transcript': is_ensembl_transcript
133-
}
128+
mane_transcripts_dict[hgvs_expr] = {
129+
'classification_token': s,
130+
'transcript_token': t,
131+
'is_ensembl_transcript': is_ensembl_transcript
132+
}
134133

134+
if allele:
135135
len_of_seq = self.seqrepo_access.len_of_sequence(t)
136136
is_len_lt_end = len_of_seq < int(s.end_pos_del) - 1
137137
is_len_lt_start = \

variant/validators/genomic_substitution.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,11 @@ def get_valid_invalid_results(self, classification_tokens, transcripts,
107107
self.get_hgvs_expr(classification, t, s, False)
108108
allele = self.get_allele_from_hgvs(hgvs_expr, errors)
109109

110-
if allele:
111-
mane_transcripts_dict[hgvs_expr] = {
112-
'classification_token': s,
113-
'transcript_token': t,
114-
'is_ensembl_transcript': is_ensembl_transcript
115-
}
110+
mane_transcripts_dict[hgvs_expr] = {
111+
'classification_token': s,
112+
'transcript_token': t,
113+
'is_ensembl_transcript': is_ensembl_transcript
114+
}
116115

117116
self.check_ref_nucleotide(ref_nuc, s, t, errors)
118117
self.add_validation_result(

0 commit comments

Comments
 (0)