@@ -434,6 +434,7 @@ typedef struct _args_t
434
434
struct {
435
435
int unknown_chr ,unknown_tscript_biotype ,unknown_strand ,unknown_phase ,duplicate_id ;
436
436
int unknown_cds_phase ,incomplete_cds ,wrong_phase ,overlapping_cds ,ref_allele_mismatch ;
437
+ int faidx_fetch_failed ;
437
438
} warned ;
438
439
439
440
char * gencode_str ; // which genetic code table to use
@@ -2767,14 +2768,32 @@ void vbuf_flush(args_t *args, uint32_t pos)
2767
2768
args -> ncsq_buf = 0 ;
2768
2769
}
2769
2770
2770
- void tscript_init_ref (args_t * args , gf_tscript_t * tr , const char * chr )
2771
+ // returns 0 on success, -1 if sequence is not present in the fasta file (eg chrM)
2772
+ int tscript_init_ref (args_t * args , gf_tscript_t * tr , const char * chr )
2771
2773
{
2772
2774
int i , len ;
2773
2775
int pad_beg = tr -> beg >= N_REF_PAD ? N_REF_PAD : tr -> beg ;
2774
2776
2777
+ // if forced to repeatedly faidx-fetch a non-existent chromosome, turn off hts verbosity, unless
2778
+ // explicitly asked not to
2779
+ int verbose = hts_verbose ;
2780
+ if ( args -> warned .faidx_fetch_failed && args -> verbosity < 2 ) hts_verbose = 0 ;
2775
2781
TSCRIPT_AUX (tr )-> ref = faidx_fetch_seq (args -> fai , chr , tr -> beg - pad_beg , tr -> end + N_REF_PAD , & len );
2782
+ hts_verbose = verbose ;
2776
2783
if ( !TSCRIPT_AUX (tr )-> ref )
2777
- error ("faidx_fetch_seq failed %s:%d-%d\n" , chr ,tr -> beg + 1 ,tr -> end + 1 );
2784
+ {
2785
+ if ( !args -> force )
2786
+ error ("Error: unable to fetch the region of the fasta reference %s:%d-%d\n" , chr ,tr -> beg + 1 ,tr -> end + 1 );
2787
+
2788
+ else if ( args -> verbosity && (!args -> warned .faidx_fetch_failed || args -> verbosity > 1 ) )
2789
+ {
2790
+ fprintf (stderr ,"Warning: unable to fetch the region of the fasta reference %s:%d-%d\n" , chr ,tr -> beg + 1 ,tr -> end + 1 );
2791
+ if ( args -> verbosity < 2 )
2792
+ fprintf (stderr ," This message is printed only once, the verbosity can be increased with `--verbosity 2`\n" );
2793
+ }
2794
+ args -> warned .faidx_fetch_failed ++ ;
2795
+ return -1 ;
2796
+ }
2778
2797
2779
2798
int pad_end = len - (tr -> end - tr -> beg + 1 + pad_beg );
2780
2799
if ( pad_beg + pad_end != 2 * N_REF_PAD )
@@ -2788,6 +2807,7 @@ void tscript_init_ref(args_t *args, gf_tscript_t *tr, const char *chr)
2788
2807
free (TSCRIPT_AUX (tr )-> ref );
2789
2808
TSCRIPT_AUX (tr )-> ref = ref ;
2790
2809
}
2810
+ return 0 ;
2791
2811
}
2792
2812
2793
2813
// returns 0 on success, negative number on reference mismatch
@@ -2848,7 +2868,12 @@ int test_cds_local(args_t *args, bcf1_t *rec)
2848
2868
if ( !TSCRIPT_AUX (tr ) )
2849
2869
{
2850
2870
tr -> aux = calloc (sizeof (tscript_t ),1 );
2851
- tscript_init_ref (args , tr , chr_fai );
2871
+ if ( tscript_init_ref (args , tr , chr_fai ) )
2872
+ {
2873
+ free (tr -> aux );
2874
+ tr -> aux = NULL ;
2875
+ continue ;
2876
+ }
2852
2877
tscript_splice_ref (tr );
2853
2878
khp_insert (trhp , args -> active_tr , & tr ); // only to clean the reference afterwards
2854
2879
}
@@ -3042,7 +3067,12 @@ int test_cds(args_t *args, bcf1_t *rec, vbuf_t *vbuf)
3042
3067
{
3043
3068
// initialize the transcript and its haplotype tree, fetch the reference sequence
3044
3069
tr -> aux = calloc (sizeof (tscript_t ),1 );
3045
- tscript_init_ref (args , tr , chr_fai );
3070
+ if ( tscript_init_ref (args , tr , chr_fai ) )
3071
+ {
3072
+ free (tr -> aux );
3073
+ tr -> aux = NULL ;
3074
+ continue ;
3075
+ }
3046
3076
3047
3077
TSCRIPT_AUX (tr )-> root = (hap_node_t * ) calloc (1 ,sizeof (hap_node_t ));
3048
3078
TSCRIPT_AUX (tr )-> nhap = args -> phase == PHASE_DROP_GT ? 1 : 2 * args -> smpl -> n ; // maximum ploidy = diploid
0 commit comments