1010from GetSearchResult import spiderpub
1111from PDFHelper import PDFHelper
1212from WebHelper import WebHelper
13- from config import ProjectInfo , feedbacktime , pdfSavePath
13+ from config import ProjectInfo , projConfig
14+
15+ feedbacktime = projConfig .feedbacktime
1416
1517
1618def printSpliter (length = 25 ):
@@ -22,9 +24,9 @@ def printSpliter(length=25):
2224 # 命令行参数解析
2325 parser = argparse .ArgumentParser (
2426 description = "pubmedsoso is a python program for crawler article information and download pdf file" ,
25- usage = "python main.py keyword " )
27+ usage = "python main.py keyword" )
2628
27- parser .add_argument ('--version ' , '-v ' , action = 'version' ,
29+ parser .add_argument ('-v ' , '--version ' , action = 'version' ,
2830 version = f'\n Current the { ProjectInfo .ProjectName } \n \n version: { ProjectInfo .VersionInfo } \n ' +
2931 f'Last updated date: { ProjectInfo .LastUpdate } \n ' +
3032 f'Author: { ProjectInfo .AuthorName } \n ' ,
@@ -40,33 +42,48 @@ def printSpliter(length=25):
4042 parser .add_argument ("keyword" , type = str ,
4143 help = 'specify the keywords to search pubmed\n For example "headache"' )
4244
43- parser .add_argument ("--page_num " , "-n " , type = int ,
44- help = 'add --number or -n to specify the page number you wanna to crawl'
45- 'For example --number 10. Default number is 10' ,
45+ parser .add_argument ("-n " , "--pagenum " , type = int , metavar = '' ,
46+ help = 'add --pagenum or -n to specify the page number of info you wanna to crawl'
47+ 'For example --pagenum 10. Default number is 10' ,
4648 default = 10 )
4749
48- parser .add_argument ("--year " , "-y " , type = int ,
50+ parser .add_argument ("-y " , "--year " , type = int , metavar = '' ,
4951 help = 'add --year or -y to specify year scale you would to search'
5052 'For example --year 10. The Default is Not set' ,
5153 default = None )
5254
53- parser .add_argument ("--download_num " , "-d " , type = int ,
54- help = 'add --download_num or -d to specify the doc number you wanna to download'
55+ parser .add_argument ("-d " , "--downloadnum " , type = int , metavar = '' ,
56+ help = 'add --downloadnum or -d to specify the number of pdf you wanna to download'
5557 'For example -d 10. Default number is 10' ,
5658 default = 10 )
59+
60+ parser .add_argument ("-D" , "--directory" , type = str , metavar = '' ,
61+ help = 'add --directory or -D specify the save path of pdf file'
62+ 'For example, -D ./output. Default path is ./document/pub'
63+ 'you can overrider the default path in config.py' ,
64+ default = './document/pub' )
5765 ####################################################################################################
5866
5967 args = parser .parse_args ()
6068
69+ # print the hello info
70+ ProjectInfo .printProjectInfo ()
71+ print ("\n " )
72+
73+ # check the directory variable. the path variable from cli is preferred.
74+ # the default pdf saving directory path is from config.py which is './document/pub'
75+ if args .directory is not None :
76+ projConfig .pdfSavePath = args .directory
77+
6178 if args .keyword .isspace () or args .keyword .isnumeric ():
6279 print ("pubmedsoso search keyword error\n " )
6380 sleep (feedbacktime )
6481
65- print ("\n 欢迎使用Pubmedsoso 文件检索工具\n \n " )
6682
67- print (f"当前使用的命令行参数 { args .__dict__ } \n " )
83+
84+ print (f"Current commandline parameters: { args .__dict__ } \n " )
6885 print (
69- f"当前使用的命令行参数 搜索关键词: \" { args .keyword } \" , 文献信息检索数量: { args .page_num } , 年份:{ args .year } , 文献下载数量:{ args .download_num } \n " )
86+ f"当前使用的命令行参数 搜索关键词: \" { args .keyword } \" , 文献信息检索数量: { args .pagenum } , 年份:{ args .year } , 文献下载数量: { args .downloadnum } , 下载文献的存储目录: { projConfig . pdfSavePath } \n " )
7087 try :
7188 result_num = WebHelper .GetSearchResultNum (args .keyword )
7289 except Exception as err :
@@ -90,14 +107,14 @@ def printSpliter(length=25):
90107 printSpliter ()
91108 sleep (0.5 )
92109
93- if os .path .exists (pdfSavePath ):
110+ if os .path .exists (projConfig . pdfSavePath ):
94111 print ("文件储存目录检查正常,可以储存文件\n " )
95112 else :
96- os .makedirs (pdfSavePath )
97- print (f"成功在当前目录下建立 { pdfSavePath } 文件夹\n " )
113+ os .makedirs (projConfig . pdfSavePath )
114+ print (f"成功在当前目录下建立 { projConfig . pdfSavePath } 文件夹\n " )
98115
99116 printSpliter ()
100- print (f"{ pdfSavePath } 目录检查完成,开始执行主程序\n " )
117+ print (f"{ projConfig . pdfSavePath } 目录检查完成,开始执行主程序\n " )
101118
102119 sleep (feedbacktime )
103120
@@ -112,7 +129,7 @@ def printSpliter(length=25):
112129
113130 printSpliter ()
114131
115- spiderpub (encoded_param , args .page_num , result_num )
132+ spiderpub (encoded_param , args .pagenum , result_num )
116133
117134 printSpliter ()
118135 print ("\n \n 爬取搜索结果完成,开始执行单篇检索,耗时更久\n \n " )
@@ -123,11 +140,11 @@ def printSpliter(length=25):
123140 print ("\n \n 爬取搜索结果完成,开始执行文献下载,耗时更久\n \n " )
124141
125142 # PDFHelper.PDFBatchDonwload(args.download_num)
126- PDFHelper .PDFBatchDownloadEntry (args .download_num )
143+ PDFHelper .PDFBatchDownloadEntry (args .downloadnum )
127144
128145 ExcelHelper .PD_To_excel (dbpath , override = True )
129146 print ("爬取最终结果信息已经自动保存到excel表格中,文件名为%s" % ExcelHelper .tablename )
130- print (f"爬取的所有文献已经保存到{ pdfSavePath } 目录下" )
147+ print (f"爬取的所有文献已经保存到{ projConfig . pdfSavePath } 目录下" )
131148 print ("爬取程序已经执行完成,自动退出, 哈哈,no errors no warning" )
132149
133150 printSpliter ()
0 commit comments