Skip to content

Commit a3f9b1d

Browse files
author
Koeng101
authored
Add support for Gz'd flat files (#79)
* Adds func for reading multi and flat genbank files * Fixed comment problems * Reads, not parses * Added feature for Sequences from Gzipped flat files * Fixed error handling
1 parent bb702bc commit a3f9b1d

File tree

3 files changed

+31
-30
lines changed

3 files changed

+31
-30
lines changed

data/flatGbk_test.seq.gz

4.16 KB
Binary file not shown.

io.go

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package poly
22

33
import (
44
"bytes"
5+
"compress/gzip"
56
"encoding/json"
67
"io/ioutil"
78
"log"
@@ -325,13 +326,9 @@ func BuildGff(sequence Sequence) []byte {
325326

326327
// ReadGff takes in a filepath for a .gffv3 file and parses it into an Annotated Sequence struct.
327328
func ReadGff(path string) Sequence {
328-
file, err := ioutil.ReadFile(path)
329+
file, _ := ioutil.ReadFile(path)
329330
var sequence Sequence
330-
if err != nil {
331-
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
332-
} else {
333-
sequence = ParseGff(file)
334-
}
331+
sequence = ParseGff(file)
335332
return sequence
336333
}
337334

@@ -368,10 +365,7 @@ func ParseJSON(file []byte) Sequence {
368365

369366
// ReadJSON reads an Sequence JSON file.
370367
func ReadJSON(path string) Sequence {
371-
file, err := ioutil.ReadFile(path)
372-
if err != nil {
373-
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
374-
}
368+
file, _ := ioutil.ReadFile(path)
375369
sequence := ParseJSON(file)
376370
return sequence
377371
}
@@ -499,10 +493,7 @@ func BuildFASTA(sequence Sequence) []byte {
499493

500494
// ReadFASTA reads a Sequence struct from a FASTA file.
501495
func ReadFASTA(path string) Sequence {
502-
file, err := ioutil.ReadFile(path)
503-
if err != nil {
504-
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
505-
}
496+
file, _ := ioutil.ReadFile(path)
506497
sequence := ParseFASTA(file)
507498
return sequence
508499
}
@@ -715,14 +706,9 @@ func BuildGbk(sequence Sequence) []byte {
715706

716707
// ReadGbk reads a Gbk from path and parses into an Annotated sequence struct.
717708
func ReadGbk(path string) Sequence {
718-
file, err := ioutil.ReadFile(path)
709+
file, _ := ioutil.ReadFile(path)
719710
var sequence Sequence
720-
if err != nil {
721-
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
722-
} else {
723-
sequence = ParseGbk(file)
724-
725-
}
711+
sequence = ParseGbk(file)
726712
return sequence
727713
}
728714

@@ -1451,24 +1437,28 @@ func ParseGbkFlat(file []byte) []Sequence {
14511437

14521438
// ReadGbkMulti reads multiple genbank files from a single file
14531439
func ReadGbkMulti(path string) []Sequence {
1454-
file, err := ioutil.ReadFile(path)
1455-
if err != nil {
1456-
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
1457-
}
1440+
file, _ := ioutil.ReadFile(path)
14581441
sequences := ParseGbkMulti(file)
14591442
return sequences
14601443
}
14611444

1462-
// ReadGbkFlat reads flat genbank files, like the ones provided by the NCBI FTP server
1445+
// ReadGbkFlat reads flat genbank files, like the ones provided by the NCBI FTP server (after decompression)
14631446
func ReadGbkFlat(path string) []Sequence {
1464-
file, err := ioutil.ReadFile(path)
1465-
if err != nil {
1466-
// return 0, fmt.Errorf("Failed to open file %s for unpack: %s", gzFilePath, err)
1467-
}
1447+
file, _ := ioutil.ReadFile(path)
14681448
sequences := ParseGbkFlat(file)
14691449
return sequences
14701450
}
14711451

1452+
// ReadGbkFlatGz reads flat gzip'd genbank files, like the ones provided by the NCBI FTP server
1453+
func ReadGbkFlatGz(path string) []Sequence {
1454+
file, _ := ioutil.ReadFile(path)
1455+
rdata := bytes.NewReader(file)
1456+
r, _ := gzip.NewReader(rdata)
1457+
s, _ := ioutil.ReadAll(r)
1458+
sequences := ParseGbkFlat(s)
1459+
return sequences
1460+
}
1461+
14721462
/******************************************************************************
14731463
14741464
Genbank Flat specific IO related things end here.

io_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,17 @@ func ExampleReadGbkFlat() {
429429
// Output: AB000100, AB000106
430430
}
431431

432+
func ExampleReadGbkFlatGz() {
433+
sequences := ReadGbkFlatGz("data/flatGbk_test.seq.gz")
434+
var locus []string
435+
for _, sequence := range sequences {
436+
locus = append(locus, sequence.Meta.Locus.Name)
437+
}
438+
439+
fmt.Println(strings.Join(locus, ", "))
440+
// Output: AB000100, AB000106
441+
}
442+
432443
func ExampleParseGbkMulti() {
433444
file, _ := ioutil.ReadFile("data/multiGbk_test.seq")
434445
sequences := ParseGbkMulti(file)

0 commit comments

Comments
 (0)