Skip to content

Commit f2310db

Browse files
created parser regression test for nested sublocations. (#385)
* created parser regression test for nested sublocations. * integrating @abondrn's fix to genbank parser. * fixed lint issues. * added data for regression test. Co-authored-by: Alex <[email protected]> Co-authored-by: Alex <[email protected]>
1 parent 5bd6a5d commit f2310db

File tree

3 files changed

+50
-14
lines changed

3 files changed

+50
-14
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"start":0,"end":0,"complement":false,"join":true,"five_prime_partial":false,"three_prime_partial":false,"gbk_location_string":"join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))","sub_locations":[{"start":5306941,"end":5307394,"complement":true,"join":false,"five_prime_partial":false,"three_prime_partial":false,"gbk_location_string":"join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))","sub_locations":null},{"start":5304400,"end":5305029,"complement":true,"join":false,"five_prime_partial":false,"three_prime_partial":false,"gbk_location_string":"join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))","sub_locations":null},{"start":5303327,"end":5303393,"complement":true,"join":false,"five_prime_partial":false,"three_prime_partial":false,"gbk_location_string":"join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))","sub_locations":null},{"start":5301927,"end":5302004,"complement":true,"join":false,"five_prime_partial":false,"three_prime_partial":false,"gbk_location_string":"join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))","sub_locations":null}]}

io/genbank/genbank.go

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -811,8 +811,8 @@ func getSourceOrganism(metadataData []string) (string, string, []string) {
811811
func parseLocation(locationString string) (Location, error) {
812812
var location Location
813813
location.GbkLocationString = locationString
814-
if !(strings.ContainsAny(locationString, "(")) { // Case checks for simple expression of x..x
815-
if !(strings.ContainsAny(locationString, ".")) { //Case checks for simple expression x
814+
if !strings.ContainsAny(locationString, "(") { // Case checks for simple expression of x..x
815+
if !strings.ContainsAny(locationString, ".") { //Case checks for simple expression x
816816
position, err := strconv.Atoi(locationString)
817817
if err != nil {
818818
return Location{}, err
@@ -841,26 +841,34 @@ func parseLocation(locationString string) (Location, error) {
841841
if strings.ContainsAny(expression, "(") {
842842
firstInnerParentheses := strings.Index(expression, "(")
843843
ParenthesesCount := 1
844-
comma := 0
845-
for i := 1; ParenthesesCount > 0; i++ { // "(" is at 0, so we start at 1
846-
comma = i
847-
switch expression[firstInnerParentheses+i] {
848-
case []byte("(")[0]:
844+
prevSubLocationStart := 0
845+
for i := firstInnerParentheses + 1; i < len(expression); i++ { // "(" is at 0, so we start at 1
846+
switch expression[i] {
847+
case '(':
849848
ParenthesesCount++
850-
case []byte(")")[0]:
849+
case ')':
851850
ParenthesesCount--
851+
case ',':
852+
if ParenthesesCount == 0 {
853+
parsedSubLocation, err := parseLocation(expression[prevSubLocationStart:i])
854+
if err != nil {
855+
return Location{}, err
856+
}
857+
parsedSubLocation.GbkLocationString = locationString
858+
location.SubLocations = append(location.SubLocations, parsedSubLocation)
859+
prevSubLocationStart = i + 1
860+
}
852861
}
853862
}
854-
parseLeftLocation, err := parseLocation(expression[:firstInnerParentheses+comma+1])
855-
if err != nil {
856-
return Location{}, err
863+
if ParenthesesCount != 0 {
864+
return Location{}, fmt.Errorf("Unbalanced parentheses")
857865
}
858-
parseRightLocation, err := parseLocation(expression[2+firstInnerParentheses+comma:])
866+
parsedSubLocation, err := parseLocation(expression[prevSubLocationStart:])
859867
if err != nil {
860868
return Location{}, err
861869
}
862-
863-
location.SubLocations = append(location.SubLocations, parseLeftLocation, parseRightLocation)
870+
parsedSubLocation.GbkLocationString = locationString
871+
location.SubLocations = append(location.SubLocations, parsedSubLocation)
864872
} else { // This is the default join(x..x,x..x)
865873
for _, numberRange := range strings.Split(expression, ",") {
866874
joinLocation, err := parseLocation(numberRange)

io/genbank/genbank_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package genbank
22

33
import (
4+
"encoding/json"
45
"errors"
6+
"fmt"
57
"io"
68
"os"
79
"path/filepath"
@@ -160,6 +162,31 @@ func TestPartialLocationParseRegression(t *testing.T) {
160162
}
161163
}
162164

165+
func TestSubLocationStringParseRegression(t *testing.T) {
166+
location := "join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))"
167+
parsedLocation, err := parseLocation(location)
168+
if err != nil {
169+
t.Errorf("Failed to parse location string. Got err: %s", err)
170+
}
171+
jsonFile, err := os.Open("../../data/parseLocationRegressionTest.json")
172+
// if we os.Open returns an error then handle it
173+
if err != nil {
174+
fmt.Println(err)
175+
}
176+
defer jsonFile.Close()
177+
178+
byteValue, _ := io.ReadAll(jsonFile)
179+
var testParsedLocation Location
180+
err = json.Unmarshal(byteValue, &testParsedLocation)
181+
if err != nil {
182+
t.Errorf("Failed to unmarshal json. Got err: %s", err)
183+
}
184+
185+
if diff := cmp.Diff(parsedLocation, testParsedLocation); diff != "" {
186+
t.Errorf("Failed to parse sublocation string. Got this diff:\n%s", diff)
187+
}
188+
}
189+
163190
func TestSnapgeneGenbankRegression(t *testing.T) {
164191
snapgene, err := Read("../../data/puc19_snapgene.gb")
165192

0 commit comments

Comments
 (0)