11
11
from rich .progress import track
12
12
import re
13
13
import logmuse
14
- from ubiquerg import expandpath , is_command_callable
14
+ from ubiquerg import expandpath
15
15
from typing import List , Union , Dict , Tuple , NoReturn
16
16
import peppy
17
17
import pandas as pd
59
59
_filter_gsm ,
60
60
_unify_list_keys ,
61
61
gse_content_to_dict ,
62
+ is_prefetch_callable ,
62
63
)
63
64
64
65
_LOGGER = logging .getLogger (__name__ )
@@ -371,10 +372,10 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
371
372
372
373
# check to make sure prefetch is callable
373
374
if not self .just_metadata and not self .processed :
374
- if not is_command_callable ( "prefetch" ):
375
+ if not is_prefetch_callable ( ):
375
376
raise SystemExit (
376
- "To download raw data You must first install the sratoolkit, with prefetch in your PATH."
377
- " Installation instruction: http://geofetch.databio.org/en/latest/install/"
377
+ "To download raw data, you must first install the sratoolkit, with prefetch in your PATH. "
378
+ "Installation instruction: http://geofetch.databio.org/en/latest/install/"
378
379
)
379
380
380
381
acc_GSE_list = parse_accessions (
@@ -546,9 +547,9 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
546
547
name = self .project_name ,
547
548
meta_processed_samples = processed_metadata_samples ,
548
549
meta_processed_series = processed_metadata_series ,
549
- gse_meta_dict = file_gse_content_dict
550
- if len (acc_GSE_list .keys ()) == 1
551
- else None ,
550
+ gse_meta_dict = (
551
+ file_gse_content_dict if len (acc_GSE_list .keys ()) == 1 else None
552
+ ) ,
552
553
)
553
554
if self .just_object :
554
555
return return_value
@@ -559,9 +560,9 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
559
560
f"{ self .project_name } _PEP" ,
560
561
metadata_dict_combined ,
561
562
subannotation_dict_combined ,
562
- gse_meta_dict = file_gse_content_dict
563
- if len (acc_GSE_list .keys ()) == 1
564
- else None ,
563
+ gse_meta_dict = (
564
+ file_gse_content_dict if len (acc_GSE_list .keys ()) == 1 else None
565
+ ) ,
565
566
)
566
567
if self .just_object :
567
568
return return_value
@@ -1036,7 +1037,7 @@ def _write_processed_annotation(
1036
1037
)
1037
1038
1038
1039
if not just_object :
1039
- with open (file_annotation_path , "w" ) as m_file :
1040
+ with open (file_annotation_path , "w" , encoding = "utf-8" ) as m_file :
1040
1041
dict_writer = csv .DictWriter (m_file , processed_metadata [0 ].keys ())
1041
1042
dict_writer .writeheader ()
1042
1043
dict_writer .writerows (processed_metadata )
@@ -1789,15 +1790,22 @@ def _download_processed_file(self, file_url: str, data_folder: str) -> bool:
1789
1790
return True
1790
1791
1791
1792
except IOError as e :
1792
- _LOGGER .error (str (e ))
1793
- # The server times out if we are hitting it too frequently,
1794
- # so we should sleep a bit to reduce frequency
1795
- sleeptime = (ntry + 1 ) ** 3
1796
- _LOGGER .info (f"Sleeping for { sleeptime } seconds" )
1797
- time .sleep (sleeptime )
1798
- ntry += 1
1799
- if ntry > 4 :
1800
- raise e
1793
+ if os .name == "nt" :
1794
+ _LOGGER .error (f"{ e } " )
1795
+ raise OSError (
1796
+ "Windows may not have wget command. "
1797
+ "Check if `wget` command is installed correctly."
1798
+ )
1799
+ else :
1800
+ _LOGGER .error (str (e ))
1801
+ # The server times out if we are hitting it too frequently,
1802
+ # so we should sleep a bit to reduce frequency
1803
+ sleeptime = (ntry + 1 ) ** 3
1804
+ _LOGGER .info (f"Sleeping for { sleeptime } seconds" )
1805
+ time .sleep (sleeptime )
1806
+ ntry += 1
1807
+ if ntry > 4 :
1808
+ raise e
1801
1809
1802
1810
def _get_SRA_meta (self , file_gse_content : list , gsm_metadata , file_sra = None ):
1803
1811
"""
@@ -1865,12 +1873,13 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None):
1865
1873
else :
1866
1874
# open existing annotation
1867
1875
_LOGGER .info ("Found SRA metadata, opening.." )
1868
- with open (file_sra , "r" ) as m_file :
1876
+ with open (file_sra , "r" , encoding = "UTF-8" ) as m_file :
1869
1877
reader = csv .reader (m_file )
1870
1878
file_list = []
1871
1879
srp_list = []
1872
1880
for k in reader :
1873
- file_list .append (k )
1881
+ if k :
1882
+ file_list .append (k )
1874
1883
for value_list in file_list [1 :]:
1875
1884
srp_list .append (dict (zip (file_list [0 ], value_list )))
1876
1885
0 commit comments