Skip to content

Commit 2563cad

Browse files
committed
Allow non-existent chromosomes in fasta file when --force is used (such as chrMT). Update documentation
1 parent 2d811c5 commit 2563cad

File tree

3 files changed

+292
-244
lines changed

3 files changed

+292
-244
lines changed

csq.c

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ typedef struct _args_t
434434
struct {
435435
int unknown_chr,unknown_tscript_biotype,unknown_strand,unknown_phase,duplicate_id;
436436
int unknown_cds_phase,incomplete_cds,wrong_phase,overlapping_cds,ref_allele_mismatch;
437+
int faidx_fetch_failed;
437438
} warned;
438439

439440
char *gencode_str; // which genetic code table to use
@@ -2767,14 +2768,32 @@ void vbuf_flush(args_t *args, uint32_t pos)
27672768
args->ncsq_buf = 0;
27682769
}
27692770

2770-
void tscript_init_ref(args_t *args, gf_tscript_t *tr, const char *chr)
2771+
// returns 0 on success, -1 if sequence is not present in the fasta file (eg chrM)
2772+
int tscript_init_ref(args_t *args, gf_tscript_t *tr, const char *chr)
27712773
{
27722774
int i, len;
27732775
int pad_beg = tr->beg >= N_REF_PAD ? N_REF_PAD : tr->beg;
27742776

2777+
// if forced to repeatedly faidx-fetch a non-existent chromosome, turn off hts verbosity, unless
2778+
// explicitly asked not to
2779+
int verbose = hts_verbose;
2780+
if ( args->warned.faidx_fetch_failed && args->verbosity < 2 ) hts_verbose = 0;
27752781
TSCRIPT_AUX(tr)->ref = faidx_fetch_seq(args->fai, chr, tr->beg - pad_beg, tr->end + N_REF_PAD, &len);
2782+
hts_verbose = verbose;
27762783
if ( !TSCRIPT_AUX(tr)->ref )
2777-
error("faidx_fetch_seq failed %s:%d-%d\n", chr,tr->beg+1,tr->end+1);
2784+
{
2785+
if ( !args->force )
2786+
error("Error: unable to fetch the region of the fasta reference %s:%d-%d\n", chr,tr->beg+1,tr->end+1);
2787+
2788+
else if ( args->verbosity && (!args->warned.faidx_fetch_failed || args->verbosity > 1) )
2789+
{
2790+
fprintf(stderr,"Warning: unable to fetch the region of the fasta reference %s:%d-%d\n", chr,tr->beg+1,tr->end+1);
2791+
if ( args->verbosity < 2 )
2792+
fprintf(stderr," This message is printed only once, the verbosity can be increased with `--verbosity 2`\n");
2793+
}
2794+
args->warned.faidx_fetch_failed++;
2795+
return -1;
2796+
}
27782797

27792798
int pad_end = len - (tr->end - tr->beg + 1 + pad_beg);
27802799
if ( pad_beg + pad_end != 2*N_REF_PAD )
@@ -2788,6 +2807,7 @@ void tscript_init_ref(args_t *args, gf_tscript_t *tr, const char *chr)
27882807
free(TSCRIPT_AUX(tr)->ref);
27892808
TSCRIPT_AUX(tr)->ref = ref;
27902809
}
2810+
return 0;
27912811
}
27922812

27932813
// returns 0 on success, negative number on reference mismatch
@@ -2848,7 +2868,12 @@ int test_cds_local(args_t *args, bcf1_t *rec)
28482868
if ( !TSCRIPT_AUX(tr) )
28492869
{
28502870
tr->aux = calloc(sizeof(tscript_t),1);
2851-
tscript_init_ref(args, tr, chr_fai);
2871+
if ( tscript_init_ref(args, tr, chr_fai) )
2872+
{
2873+
free(tr->aux);
2874+
tr->aux = NULL;
2875+
continue;
2876+
}
28522877
tscript_splice_ref(tr);
28532878
khp_insert(trhp, args->active_tr, &tr); // only to clean the reference afterwards
28542879
}
@@ -3042,7 +3067,12 @@ int test_cds(args_t *args, bcf1_t *rec, vbuf_t *vbuf)
30423067
{
30433068
// initialize the transcript and its haplotype tree, fetch the reference sequence
30443069
tr->aux = calloc(sizeof(tscript_t),1);
3045-
tscript_init_ref(args, tr, chr_fai);
3070+
if ( tscript_init_ref(args, tr, chr_fai) )
3071+
{
3072+
free(tr->aux);
3073+
tr->aux = NULL;
3074+
continue;
3075+
}
30463076

30473077
TSCRIPT_AUX(tr)->root = (hap_node_t*) calloc(1,sizeof(hap_node_t));
30483078
TSCRIPT_AUX(tr)->nhap = args->phase==PHASE_DROP_GT ? 1 : 2*args->smpl->n; // maximum ploidy = diploid

0 commit comments

Comments
 (0)