Skip to content

Parsing of FASTA header into name and description differs from ADAM #2

@heuermh

Description

@heuermh
scala>  import org.biojava.nbio.adam.BiojavaAdamContext
scala>  val biojavaContext = new BiojavaAdamContext(sc)

scala> val dna = sc.loadBiojavaFastaDna("../adam/adam-core/src/test/resources/HLA_DQB1_05_01_01_02.fa")
scala> val dna = biojavaContext.loadBiojavaFastaDna("../adam/adam-core/src/test/resources/HLA_DQB1_05_01_01_02.fa")

scala> dna.rdd.first

res6: org.bdgenomics.formats.avro.Sequence = {
"name": "HLA-DQB1*05:01:01:02\tHLA06615 7090 bp", "description": null,
"alphabet": "DNA", "sequence": "TTCTAAGACCTTTGCTCTTCTCCCCAGGACTTAAGGCTCTTCAGCGTGTCTAAGACAACAGCAGTAAAAATTTCTGTGACAGCAATTTTCTCTCCCCTGAAATATGATCCCCACCTAATTTGCTTTATTAAAAATCCCAAGTATAATAACAACTGGTTTTTAACAATATTACAGAGATGTTTACTGTTGAATTGCATTTTTCTTTTGCCTCTCAAAATCCCTGAGGAATTTGTTCTTCAGCTCTTCTATAATCGAGAGGAAATTTTCACCTCAGATGTTCATCCAGTGCAATTGGAAGACGTCACAGTGCCAGGCACTGGATTGAGAACCTTCACAAAAAAAAATGTCTGCCCAGAGACAGATGAGGTCCTTCAGCTCCAGTGCTGATTGGTTCTTTTCCAAGCGACCATCCAATCCTGCCACGCACGGAAACATCCACAGGTTTTTATTCTTTCTGCCAGGTACATCAGATCCATCAGGTCTGAGCTGTGTTGACTACCACTACTTTTCCCTTCGTCTCAATTATGTCTTGGAAGAAGTCTTTGCGGATCCCCGGAGACCTTCGGGTAGCAACTGTCACCTTGATGCTGGCGATCCTGAGCTCCTCACTGGCTGAGGGCAGAGACTCTCCCGGTAAGTGCAGGAAAG...

scala> val dna = biojavaContext.loadFastaDna("../adam/adam-core/src/test/resources/HLA_DQB1_05_01_01_02.fa")

scala> dna.rdd.first

res8: org.bdgenomics.formats.avro.Sequence = {
"name": "HLA-DQB1*05:01:01:02", "description": "HLA06615 7090 bp",
"alphabet": "DNA", "sequence": "TTCTAAGACCTTTGCTCTTCTCCCCAGGACTTAAGGCTCTTCAGCGTGTCTAAGACAACAGCAGTAAAAATTTCTGTGACAGCAATTTTCTCTCCCCTGAAATATGATCCCCACCTAATTTGCTTTATTAAAAATCCCAAGTATAATAACAACTGGTTTTTAACAATATTACAGAGATGTTTACTGTTGAATTGCATTTTTCTTTTGCCTCTCAAAATCCCTGAGGAATTTGTTCTTCAGCTCTTCTATAATCGAGAGGAAATTTTCACCTCAGATGTTCATCCAGTGCAATTGGAAGACGTCACAGTGCCAGGCACTGGATTGAGAACCTTCACAAAAAAAAATGTCTGCCCAGAGACAGATGAGGTCCTTCAGCTCCAGTGCTGATTGGTTCTTTTCCAAGCGACCATCCAATCCTGCCACGCACGGAAACATCCACAGGTTTTTATTCTTTCTGCCAGGTACATCAGATCCATCAGGTCTGAGCTGTGTTGACTACCACTACTTTTCCCTTCGTCTCAATTATGTCTTGGAAGAAGTCTTTGCGGATCCCCGGAGACCTTCGGGTAGCAACTGTCACCTTGATGCTGGCGATCCTGAGCTCCTCACTGGCTGAGGGCAGAGACTCTCCCGGTAAGTGCAGGAAAGCTGC...

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions