Skip to content

Commit d41e9ee

Browse files
authored
Merge pull request #138 from cancervariants/issue-114
Issue 121
2 parents 45db18f + 3ca62ca commit d41e9ee

File tree

70 files changed

+548
-535
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+548
-535
lines changed

.ebextensions/01_download_data.config

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@ container_commands:
1010
test: test ! -d "./variation/data/seqrepo"
1111
command: "yum install -y unzip"
1212

13-
04_hgnc_gene_symbol_download:
14-
test: test ! -f "./variation/data/hgnc_gene_symbols.txt"
15-
command: "aws s3 cp s3://${AWS_BUCKET_NAME}/variation/hgnc_gene_symbols.txt ./variation/data/hgnc_gene_symbols.txt --region us-east-2"
16-
1713
05_refseq_gene_symbol_download:
1814
test: test ! -f "./variation/data/refseq_gene_symbols.txt"
1915
command: "aws s3 cp s3://${AWS_BUCKET_NAME}/variation/refseq_gene_symbols.txt ./variation/data/refseq_gene_symbols.txt --region us-east-2"

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ Variation Normalization uses [Ensembl BioMart](http://www.ensembl.org/biomart/ma
7171
![image](biomart.png)
7272
7373
### Setting up Gene Normalizer
74-
Variation Normalization `normalize` endpoint relies on data from [Gene Normalization](https://github.com/cancervariants/gene-normalization. You must have Gene Normalization's DynamoDB running for the `normalizer` endpoint to work.
74+
Variation Normalization relies on data from [Gene Normalization](https://github.com/cancervariants/gene-normalization. You must have Gene Normalization's DynamoDB running for the application to work.
7575
7676
To setup, follow the instructions from the [README](https://github.com/cancervariants/gene-normalization/blob/main/README.md).
7777

tests/classifiers/classifier_base.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
import yaml
33
from variation.tokenizers import Tokenize
44
from tests import PROJECT_ROOT
5+
from variation.tokenizers.caches import AminoAcidCache
6+
from variation.tokenizers import GeneSymbol
7+
from gene.query import QueryHandler as GeneQueryHandler
58

69

710
class ClassifierBase:
@@ -16,7 +19,8 @@ def setUp(self):
1619
{'should_match': [], 'should_not_match': []}
1720
)
1821
self.classifier = self.classifier_instance()
19-
self.tokenizer = Tokenize()
22+
self.tokenizer = Tokenize(AminoAcidCache(),
23+
GeneSymbol(GeneQueryHandler()))
2024

2125
def classifier_instance(self):
2226
"""Check that the classifier_instance method is implemented."""

tests/classifiers/test_fusion.py

Lines changed: 0 additions & 16 deletions
This file was deleted.

tests/fixtures/validators.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ coding_dna_delins:
9797
should_not_match:
9898
- query: NM_005228:c.2237_2253delinsTTGCT
9999
- query: ENST00000277541.6:c.7330479587395delinsACA
100-
- query: NM_000551.3:c.4560delinsAA
101-
- query: NM_000551.3:c.4559_4560delinsAA
100+
- query: NM_000551.3:c.4561delinsAA
101+
- query: NM_000551.3:c.4561_4562delinsAA
102102
- query: NM_000551.3:c.4560_4561delinsAA
103103

104104
genomic_delins:
@@ -182,4 +182,4 @@ genomic_insertion:
182182
- query: ERBB2 g.37880993_37880994insGCTTACGTGATG
183183
should_not_match:
184184
- query: NC_000022.10:g.51304566_51304567insT
185-
- query: NC_000022.10:g.51304565_51304566insT
185+
- query: NC_000022.10:g.51304567_51304568insT

tests/test_normalize.py

Lines changed: 8 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,10 @@
11
"""Module for testing the normalize endpoint."""
22
import pytest
3-
from ga4gh.vrs.dataproxy import SeqRepoDataProxy
4-
from ga4gh.vrs.extras.translator import Translator
5-
from variation.normalize import Normalize
3+
from variation.query import QueryHandler
64
from variation.schemas.ga4gh_vrsatile import VariationDescriptor
7-
from variation.to_vrs import ToVRS
8-
from variation.main import normalize as normalize_get_response
9-
from variation.main import translate as to_vrs_get_response
10-
from variation.classifiers import Classify
11-
from variation.tokenizers import Tokenize
12-
from variation.validators import Validate
13-
from variation.translators import Translate
14-
from variation.data_sources import SeqRepoAccess, TranscriptMappings, \
15-
UTA, MANETranscriptMappings
16-
from variation.mane_transcript import MANETranscript
17-
from variation.tokenizers import GeneSymbol
18-
from variation.tokenizers.caches import GeneSymbolCache, AminoAcidCache
195
from datetime import datetime
6+
from variation.main import normalize as normalize_get_response
7+
from variation.main import to_vrs as to_vrs_get_response
208
import copy
219

2210

@@ -26,40 +14,13 @@ def test_normalize():
2614
class TestNormalize:
2715

2816
def __init__(self):
29-
tokenizer = Tokenize()
30-
classifier = Classify()
31-
seqrepo_access = SeqRepoAccess()
32-
transcript_mappings = TranscriptMappings()
33-
gene_symbol = GeneSymbol(GeneSymbolCache())
34-
amino_acid_cache = AminoAcidCache()
35-
uta = UTA()
36-
mane_transcript_mappings = MANETranscriptMappings()
37-
dp = SeqRepoDataProxy(seqrepo_access.seq_repo_client)
38-
tlr = Translator(data_proxy=dp)
39-
mane_transcript = MANETranscript(seqrepo_access,
40-
transcript_mappings,
41-
mane_transcript_mappings, uta)
42-
validator = Validate(seqrepo_access, transcript_mappings,
43-
gene_symbol, mane_transcript, uta,
44-
dp, tlr, amino_acid_cache)
45-
translator = Translate()
46-
47-
self.to_vrs = ToVRS(tokenizer, classifier, seqrepo_access,
48-
transcript_mappings, gene_symbol,
49-
amino_acid_cache, uta,
50-
mane_transcript_mappings, mane_transcript,
51-
validator, translator)
52-
self.test_normalize = Normalize(seqrepo_access, uta)
17+
self.query_handler = QueryHandler()
18+
19+
def to_vrs(self, q):
20+
return self.query_handler.to_vrs(q)
5321

5422
def normalize(self, q):
55-
validations, warnings = self.to_vrs.get_validations(
56-
q, normalize_endpoint=True
57-
)
58-
resp = \
59-
self.test_normalize.normalize(q,
60-
validations,
61-
warnings)
62-
return resp
23+
return self.query_handler.normalize(q)
6324

6425
return TestNormalize()
6526

tests/tokenizers/test_gene.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
"""A module for testing the Gene Pair Tokenizer."""
22
import unittest
33
from variation.tokenizers import GeneSymbol
4-
from variation.tokenizers.caches import GeneSymbolCache
54
from .tokenizer_base import TokenizerBase
5+
from gene.query import QueryHandler as GeneQueryHandler
66

77

88
class TestGenePairTokenizer(TokenizerBase, unittest.TestCase):
99
"""The Gene Pair Tokenizer class."""
1010

1111
def tokenizer_instance(self):
1212
"""Return the Gene Pair tokenizer instance."""
13-
gene_cache = GeneSymbolCache()
14-
return GeneSymbol(gene_cache)
13+
return GeneSymbol(GeneQueryHandler())
1514

1615
def token_type(self):
1716
"""Return the Gene Pair token type."""

tests/tokenizers/test_gene_pair.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Module for testing gene pair tokenization."""
22
import unittest
33
from variation.tokenizers import GenePair
4-
from variation.tokenizers.caches import GeneSymbolCache
54
from .tokenizer_base import TokenizerBase
65

76

@@ -10,7 +9,7 @@ class TestGenePairTokenizer(TokenizerBase, unittest.TestCase):
109

1110
def tokenizer_instance(self):
1211
"""Return Gene Pair Tokenizer instance."""
13-
return GenePair(GeneSymbolCache())
12+
return GenePair()
1413

1514
def token_type(self):
1615
"""Return Gene Pair token type."""

tests/translators/test_amino_acid_deletion.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55
from variation.validators import AminoAcidDeletion as AAD_V
66
from .translator_base import TranslatorBase
77
from variation.tokenizers import GeneSymbol
8-
from variation.tokenizers.caches import GeneSymbolCache, AminoAcidCache
8+
from variation.tokenizers.caches import AminoAcidCache
99
from variation.data_sources import TranscriptMappings, SeqRepoAccess, \
1010
MANETranscriptMappings, UTA
1111
from variation.mane_transcript import MANETranscript
1212
from ga4gh.vrs.dataproxy import SeqRepoDataProxy
1313
from ga4gh.vrs.extras.translator import Translator
14+
from gene.query import QueryHandler as GeneQueryHandler
1415

1516

1617
class TestAminoAcidDeletionTranslator(TranslatorBase, unittest.TestCase):
@@ -28,7 +29,8 @@ def validator_instance(self):
2829
dp = SeqRepoDataProxy(seqrepo_access.seq_repo_client)
2930
tlr = Translator(data_proxy=dp)
3031
return AAD_V(
31-
seqrepo_access, transcript_mappings, GeneSymbol(GeneSymbolCache()),
32+
seqrepo_access, transcript_mappings,
33+
GeneSymbol(GeneQueryHandler()),
3234
MANETranscript(seqrepo_access, transcript_mappings,
3335
MANETranscriptMappings(), uta),
3436
uta, dp, tlr, AminoAcidCache())

tests/translators/test_amino_acid_delins.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55
from variation.validators import AminoAcidDelIns as AAD_V
66
from .translator_base import TranslatorBase
77
from variation.tokenizers import GeneSymbol
8-
from variation.tokenizers.caches import GeneSymbolCache, AminoAcidCache
8+
from variation.tokenizers.caches import AminoAcidCache
99
from variation.data_sources import TranscriptMappings, SeqRepoAccess, \
1010
MANETranscriptMappings, UTA
1111
from variation.mane_transcript import MANETranscript
1212
from ga4gh.vrs.dataproxy import SeqRepoDataProxy
1313
from ga4gh.vrs.extras.translator import Translator
14+
from gene.query import QueryHandler as GeneQueryHandler
1415

1516

1617
class TestAminoAcidDelInsTranslator(TranslatorBase, unittest.TestCase):
@@ -28,7 +29,8 @@ def validator_instance(self):
2829
dp = SeqRepoDataProxy(seqrepo_access.seq_repo_client)
2930
tlr = Translator(data_proxy=dp)
3031
return AAD_V(
31-
seqrepo_access, transcript_mappings, GeneSymbol(GeneSymbolCache()),
32+
seqrepo_access, transcript_mappings,
33+
GeneSymbol(GeneQueryHandler()),
3234
MANETranscript(seqrepo_access, transcript_mappings,
3335
MANETranscriptMappings(), uta),
3436
uta, dp, tlr, AminoAcidCache())

0 commit comments

Comments
 (0)