Skip to content

Commit 55abc5f

Browse files
committed
now it is possible to merge multiple vcf files
1 parent b93fd48 commit 55abc5f

File tree

1 file changed

+108
-70
lines changed
  • matrix_table_consumer/functions_go

1 file changed

+108
-70
lines changed

matrix_table_consumer/functions_go/merge.go

Lines changed: 108 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -70,77 +70,115 @@ func parseVCFLine(line string, sampleNames []string) *VCFRecordWithSamples {
7070
}
7171

7272
// readVCFHeaders reads headers from two VCF files
73-
func readVCFHeaders(vcf1, vcf2 string) ([]string, error) {
73+
func readVCFHeaders(vcf1, vcf2 string, vcf_files []string) ([]string, error) {
7474
headers := make([]string, 0)
7575
samplesNames := make([]string, 0)
7676
var endHeaderWithoutSamples []string
7777

78-
// Reading the first file
79-
file1, err := os.Open(vcf1)
80-
if err != nil {
81-
return nil, err
82-
}
83-
defer file1.Close()
78+
if len(vcf_files) > 0 {
79+
for _, vcf_path := range vcf_files {
80+
file, err := os.Open(vcf_path)
81+
if err != nil {
82+
return nil, err
83+
}
84+
defer file.Close()
8485

85-
var reader1 *bufio.Reader
86-
if strings.HasSuffix(vcf1, ".gz") {
87-
gr, err := gzip.NewReader(file1)
88-
if err != nil {
89-
s := fmt.Sprintf("Error creating gzip reader: %v\n", err)
90-
LoggerError(s)
86+
var reader *bufio.Reader
87+
if strings.HasSuffix(vcf1, ".gz") {
88+
gr, err := gzip.NewReader(file)
89+
if err != nil {
90+
s := fmt.Sprintf("Error creating gzip reader: %v\n", err)
91+
LoggerError(s)
92+
}
93+
defer gr.Close()
94+
reader = bufio.NewReader(gr)
95+
} else {
96+
reader = bufio.NewReader(file)
97+
}
98+
99+
scanner := GetScaner(reader)
100+
for scanner.Scan() {
101+
line := scanner.Text()
102+
if strings.HasPrefix(line, "##") {
103+
if !contains(headers, line) {
104+
headers = append(headers, line)
105+
}
106+
} else if strings.HasPrefix(line, "#CHROM") {
107+
headerEnd := strings.Split(strings.TrimSpace(line), "\t")
108+
endHeaderWithoutSamples = headerEnd[:9]
109+
samplesNames = append(samplesNames, headerEnd[9:]...)
110+
break
111+
}
112+
}
91113
}
92-
defer gr.Close()
93-
reader1 = bufio.NewReader(gr)
94114
} else {
95-
reader1 = bufio.NewReader(file1)
96-
}
115+
// Reading the first file
116+
file1, err := os.Open(vcf1)
117+
if err != nil {
118+
return nil, err
119+
}
120+
defer file1.Close()
97121

98-
scanner1 := GetScaner(reader1)
99-
for scanner1.Scan() {
100-
line := scanner1.Text()
101-
if strings.HasPrefix(line, "##") {
102-
if !contains(headers, line) {
103-
headers = append(headers, line)
122+
var reader1 *bufio.Reader
123+
if strings.HasSuffix(vcf1, ".gz") {
124+
gr, err := gzip.NewReader(file1)
125+
if err != nil {
126+
s := fmt.Sprintf("Error creating gzip reader: %v\n", err)
127+
LoggerError(s)
104128
}
105-
} else if strings.HasPrefix(line, "#CHROM") {
106-
headerEnd := strings.Split(strings.TrimSpace(line), "\t")
107-
endHeaderWithoutSamples = headerEnd[:9]
108-
samplesNames = append(samplesNames, headerEnd[9:]...)
109-
break
129+
defer gr.Close()
130+
reader1 = bufio.NewReader(gr)
131+
} else {
132+
reader1 = bufio.NewReader(file1)
110133
}
111-
}
112134

113-
// Reading the second file
114-
file2, err := os.Open(vcf2)
115-
if err != nil {
116-
return nil, err
117-
}
118-
defer file2.Close()
135+
scanner1 := GetScaner(reader1)
136+
for scanner1.Scan() {
137+
line := scanner1.Text()
138+
if strings.HasPrefix(line, "##") {
139+
if !contains(headers, line) {
140+
headers = append(headers, line)
141+
}
142+
} else if strings.HasPrefix(line, "#CHROM") {
143+
headerEnd := strings.Split(strings.TrimSpace(line), "\t")
144+
endHeaderWithoutSamples = headerEnd[:9]
145+
samplesNames = append(samplesNames, headerEnd[9:]...)
146+
break
147+
}
148+
}
119149

120-
var reader2 *bufio.Reader
121-
if strings.HasSuffix(vcf2, ".gz") {
122-
gr, err := gzip.NewReader(file2)
150+
// Reading the second file
151+
file2, err := os.Open(vcf2)
123152
if err != nil {
124-
s := fmt.Sprintf("Error creating gzip reader: %v\n", err)
125-
LoggerError(s)
153+
return nil, err
126154
}
127-
defer gr.Close()
128-
reader2 = bufio.NewReader(gr)
129-
} else {
130-
reader2 = bufio.NewReader(file2)
131-
}
155+
defer file2.Close()
132156

133-
scanner2 := GetScaner(reader2)
134-
for scanner2.Scan() {
135-
line := scanner2.Text()
136-
if strings.HasPrefix(line, "##") {
137-
if !contains(headers, line) {
138-
headers = append(headers, line)
157+
var reader2 *bufio.Reader
158+
if strings.HasSuffix(vcf2, ".gz") {
159+
gr, err := gzip.NewReader(file2)
160+
if err != nil {
161+
s := fmt.Sprintf("Error creating gzip reader: %v\n", err)
162+
LoggerError(s)
163+
}
164+
defer gr.Close()
165+
reader2 = bufio.NewReader(gr)
166+
} else {
167+
reader2 = bufio.NewReader(file2)
168+
}
169+
170+
scanner2 := GetScaner(reader2)
171+
for scanner2.Scan() {
172+
line := scanner2.Text()
173+
if strings.HasPrefix(line, "##") {
174+
if !contains(headers, line) {
175+
headers = append(headers, line)
176+
}
177+
} else if strings.HasPrefix(line, "#CHROM") {
178+
headerEnd := strings.Split(strings.TrimSpace(line), "\t")
179+
samplesNames = append(samplesNames, headerEnd[9:]...)
180+
break
139181
}
140-
} else if strings.HasPrefix(line, "#CHROM") {
141-
headerEnd := strings.Split(strings.TrimSpace(line), "\t")
142-
samplesNames = append(samplesNames, headerEnd[9:]...)
143-
break
144182
}
145183
}
146184

@@ -407,21 +445,6 @@ func writeMergedRecord(record *VCFRecordWithSamples, samplesOrdered []string, ou
407445

408446
// Merge combines two VCF files
409447
func Merge(vcf1, vcf2, outputVCF, file_with_vcfs string) {
410-
headers, err := readVCFHeaders(vcf1, vcf2)
411-
if err != nil {
412-
s := fmt.Sprintf("Error: %v\n", err)
413-
LoggerError(s)
414-
}
415-
416-
LoggerInfo("Writing headers...\n")
417-
418-
if err := writeHeaders(headers, outputVCF); err != nil {
419-
s := fmt.Sprintf("Error: %v\n", err)
420-
LoggerError(s)
421-
}
422-
423-
LoggerInfo("Reading VCFs...\n")
424-
425448
var vcf_files []string
426449
if file_with_vcfs != "." {
427450
f, err := os.Open(file_with_vcfs)
@@ -447,6 +470,21 @@ func Merge(vcf1, vcf2, outputVCF, file_with_vcfs string) {
447470
}
448471
}
449472

473+
headers, err := readVCFHeaders(vcf1, vcf2, vcf_files)
474+
if err != nil {
475+
s := fmt.Sprintf("Error: %v\n", err)
476+
LoggerError(s)
477+
}
478+
479+
LoggerInfo("Writing headers...\n")
480+
481+
if err := writeHeaders(headers, outputVCF); err != nil {
482+
s := fmt.Sprintf("Error: %v\n", err)
483+
LoggerError(s)
484+
}
485+
486+
LoggerInfo("Reading VCFs...\n")
487+
450488
records, samplesList, err := readVCFs(vcf1, vcf2, vcf_files)
451489
if err != nil {
452490
s := fmt.Sprintf("Error: %v\n", err)

0 commit comments

Comments
 (0)