Skip to content

Commit 5cff2de

Browse files
committed
Add options --samples-list and --samples-file
This is to allow renaming samples from a list of samples on command line, rather than from a file of sample names. Unfortunately, the existing option `-s, --samples` conflicts with the rest of bcftools, therefore this is added as -n, --samples-list LIST New sample names given as a comma-separated list -N, --samples-file FILE New sample names in a file, see the man page for details The old option remains valid but is not advertised in the usage page. Resolves #2383
1 parent 38e0139 commit 5cff2de

File tree

4 files changed

+38
-18
lines changed

4 files changed

+38
-18
lines changed

NEWS

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
## Release a.b
22

3+
4+
Changes affecting specific commands:
5+
6+
* bcftools reheader
7+
8+
- Add options `--samples-list` and `--samples-file` to allow renaming samples from a list of
9+
samples on command line, rather than from a file of sample names (#2383)
10+
11+
312
## Release 1.22 (30th May 2025)
413

514

doc/bcftools.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3325,17 +3325,21 @@ Modify header of VCF/BCF files, change sample names.
33253325
*-h, --header* 'FILE'::
33263326
new VCF header
33273327

3328-
*-o, --output* 'FILE'::
3329-
see *<<common_options,Common Options>>*
3328+
*-n, --samples-list* 'LIST'::
3329+
comma-separated list of new sample names
33303330

33313331
*-s, --samples* 'FILE'::
3332+
*-N, --samples-file* 'FILE'::
33323333
new sample names, one name per line, in the same order as they appear
33333334
in the VCF file. Alternatively, only samples which need to be renamed
33343335
can be listed as "old_name new_name\n" pairs separated by whitespaces,
33353336
each on a separate line. If a sample name contains spaces, the
33363337
spaces can be escaped using the backslash character, for example
33373338
"Not\ a\ good\ sample\ name".
33383339

3340+
*-o, --output* 'FILE'::
3341+
see *<<common_options,Common Options>>*
3342+
33393343
*-T, --temp-prefix* 'PATH'::
33403344
this option is ignored, but left for compatibility with earlier versions of bcftools.
33413345

reheader.c

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ THE SOFTWARE. */
4949
typedef struct _args_t
5050
{
5151
char **argv, *fname, *samples_fname, *header_fname, *output_fname;
52+
int samples_is_file;
5253
char *fai_fname;
5354
htsFile *fp;
5455
faidx_t *fai;
@@ -204,6 +205,14 @@ static void update_from_fai(faidx_t *fai, kstring_t *hdr_txt)
204205
khash_str2int_destroy_free(chr_seen);
205206
}
206207

208+
static char **read_samples(char *fname, int is_file, int *nsamples)
209+
{
210+
char **samples = hts_readlist(fname, is_file, nsamples);
211+
if ( !samples && !*nsamples )
212+
error("Error parsing the %s %s \"%s\"\n", is_file?"--samples-file":"--samples-list",is_file?"file":"list",fname);
213+
return samples;
214+
}
215+
207216
static void read_header_file(char *fname, kstring_t *hdr)
208217
{
209218
kstring_t tmp = {0,0,0};
@@ -389,10 +398,7 @@ static void reheader_vcf_gz(args_t *args)
389398
int nsamples = 0;
390399
char **samples = NULL;
391400
if ( args->samples_fname )
392-
{
393-
samples = hts_readlines(args->samples_fname, &nsamples);
394-
if ( !samples || !nsamples ) error("Error reading the --samples file \"%s\"\n", args->samples_fname);
395-
}
401+
samples = read_samples(args->samples_fname, args->samples_is_file, &nsamples);
396402
if ( args->header_fname )
397403
{
398404
free(hdr.s); hdr.s = NULL; hdr.l = hdr.m = 0;
@@ -452,10 +458,7 @@ static void reheader_vcf(args_t *args)
452458
int nsamples = 0;
453459
char **samples = NULL;
454460
if ( args->samples_fname )
455-
{
456-
samples = hts_readlines(args->samples_fname, &nsamples);
457-
if ( !samples || !nsamples ) error("Error reading the --samples file \"%s\"\n", args->samples_fname);
458-
}
461+
samples = read_samples(args->samples_fname, args->samples_is_file, &nsamples);
459462
if ( args->header_fname )
460463
{
461464
free(hdr.s); hdr.s = NULL; hdr.l = hdr.m = 0;
@@ -563,10 +566,7 @@ static void reheader_bcf(args_t *args, int is_compressed)
563566
int i, nsamples = 0;
564567
char **samples = NULL;
565568
if ( args->samples_fname )
566-
{
567-
samples = hts_readlines(args->samples_fname, &nsamples);
568-
if ( !samples || !nsamples ) error("Error reading the --samples file \"%s\"\n", args->samples_fname);
569-
}
569+
samples = read_samples(args->samples_fname, args->samples_is_file, &nsamples);
570570
if ( args->header_fname )
571571
{
572572
free(htxt.s); htxt.s = NULL; htxt.l = htxt.m = 0;
@@ -664,9 +664,10 @@ static void usage(args_t *args)
664664
fprintf(stderr, " -f, --fai FILE Update sequences and their lengths from the .fai file\n");
665665
fprintf(stderr, " -h, --header FILE New header\n");
666666
fprintf(stderr, " -o, --output FILE Write output to a file [standard output]\n");
667-
fprintf(stderr, " -s, --samples FILE New sample names\n");
667+
fprintf(stderr, " -n, --samples-list LIST New sample names given as a comma-separated list\n");
668+
fprintf(stderr, " -N, --samples-file FILE New sample names in a file, see the man page for details\n");
668669
fprintf(stderr, " -T, --temp-prefix PATH Ignored; was template for temporary file name\n");
669-
fprintf(stderr, " --threads INT Use multithreading with <int> worker threads (BCF only) [0]\n");
670+
fprintf(stderr, " --threads INT Use multithreading with INT worker threads (BCF only) [0]\n");
670671
fprintf(stderr, " -v, --verbosity INT Verbosity level\n");
671672
fprintf(stderr, "\n");
672673
fprintf(stderr, "Example:\n");
@@ -695,11 +696,13 @@ int main_reheader(int argc, char *argv[])
695696
{"output",1,0,'o'},
696697
{"header",1,0,'h'},
697698
{"samples",1,0,'s'},
699+
{"samples-file",1,0,'N'},
700+
{"samples-list",1,0,'n'},
698701
{"threads",1,NULL,1},
699702
{"verbosity",required_argument,NULL,'v'},
700703
{0,0,0,0}
701704
};
702-
while ((c = getopt_long(argc, argv, "s:h:o:f:T:v:",loptions,NULL)) >= 0)
705+
while ((c = getopt_long(argc, argv, "s:h:o:f:T:v:N:n:",loptions,NULL)) >= 0)
703706
{
704707
switch (c)
705708
{
@@ -710,7 +713,9 @@ int main_reheader(int argc, char *argv[])
710713
case 'T': break; // unused - was temp file prefix
711714
case 'f': args->fai_fname = optarg; break;
712715
case 'o': args->output_fname = optarg; break;
713-
case 's': args->samples_fname = optarg; break;
716+
case 's': args->samples_fname = optarg; args->samples_is_file = 1; break;
717+
case 'N': args->samples_fname = optarg; args->samples_is_file = 1; break;
718+
case 'n': args->samples_fname = optarg; args->samples_is_file = 0; break;
714719
case 'h': args->header_fname = optarg; break;
715720
case '?': usage(args); break;
716721
default: error("Unknown argument: %s\n", optarg);

test/test.pl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,8 @@
889889
run_test(\&test_vcf_concat,$opts,in=>['concat.5.a','concat.5.b','concat.5.c'],out=>'concat.5.3.out',do_bcf=>1,args=>'-G -a -D');
890890
run_test(\&test_vcf_reheader,$opts,in=>'reheader',out=>'reheader.1.out',header=>'reheader.hdr');
891891
run_test(\&test_vcf_reheader,$opts,in=>'reheader',out=>'reheader.2.out',samples=>'reheader.samples');
892+
run_test(\&test_vcf_reheader,$opts,in=>'reheader',out=>'reheader.2.out',args=>'-N {PATH}/reheader.samples');
893+
run_test(\&test_vcf_reheader,$opts,in=>'reheader',out=>'reheader.2.out',args=>'-n AAA,BBB');
892894
run_test(\&test_vcf_reheader,$opts,in=>'reheader',out=>'reheader.2.out',samples=>'reheader.samples2');
893895
run_test(\&test_vcf_reheader,$opts,in=>'reheader',out=>'reheader.3.out',samples=>'reheader.samples3');
894896
run_test(\&test_vcf_reheader,$opts,in=>'reheader',out=>'reheader.4.out',samples=>'reheader.samples4');

0 commit comments

Comments
 (0)