1
1
import requests
2
- import os
3
-
4
- def model_on_modelscope (proj : str , fn : str , user : str = 'judd2024' ) -> dict :
5
- url = f"https://modelscope.cn/api/v1/models/{ user } /{ proj } /repo?Revision=master&FilePath={ fn } "
6
- return { 'fn' : fn , 'url' : url }
7
-
8
- all_models = {
9
- 'qwen2' : {
10
- 'default' : '1.5b' ,
11
- 'brief' : 'Qwen2 is a new series of large language models from Alibaba group.' ,
12
- 'variants' : {
13
- '7b' : {
14
- 'default' : 'q8' ,
15
- 'quantized' : {
16
- 'q8' : model_on_modelscope ('chatllm_quantized_qwen2' , 'qwen2-7b.bin' )
17
- }
18
- },
19
- '1.5b' : {
20
- 'default' : 'q8' ,
21
- 'quantized' : {
22
- 'q8' : model_on_modelscope ('chatllm_quantized_qwen2' , 'qwen2-1.5b.bin' )
23
- }
24
- },
25
- '0.5b' : {
26
- 'default' : 'q8' ,
27
- 'quantized' : {
28
- 'q8' : model_on_modelscope ('chatllm_quantized_qwen2' , 'qwen2-0.5b.bin' )
29
- }
30
- },
31
- }
32
- },
33
- 'gemma' : {
34
- 'default' : '2b' ,
35
- 'brief' : 'Gemma is a family of lightweight, state-of-the-art open models built by Google DeepMind. Updated to version 1.1.' ,
36
- 'variants' : {
37
- '2b' : {
38
- 'default' : 'q8' ,
39
- 'quantized' : {
40
- 'q8' : model_on_modelscope ('chatllm_quantized_models' , 'gemma-1.1-2b.bin' )
41
- }
42
- },
43
- }
44
- },
45
- 'llama3' : {
46
- 'default' : '8b' ,
47
- 'brief' : 'Meta Llama 3: The most capable openly available LLM to date.' ,
48
- 'variants' : {
49
- '8b' : {
50
- 'default' : 'q4_1' ,
51
- 'quantized' : {
52
- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'llama3-8b-q4_1.bin' )
53
- }
54
- },
55
- }
56
- },
57
- 'minicpm' : {
58
- 'default' : '2b-sft' ,
59
- 'brief' : 'Meta Llama 3: The most capable openly available LLM to date.' ,
60
- 'variants' : {
61
- '2b-sft' : {
62
- 'default' : 'q8' ,
63
- 'quantized' : {
64
- 'q8' : model_on_modelscope ('chatllm_quantized_models' , 'minicpm_sft_q8.bin' )
65
- }
66
- },
67
- '2b-dpo' : {
68
- 'default' : 'q4_1' ,
69
- 'quantized' : {
70
- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'minicpm-dpo-q4_1.bin' )
71
- }
72
- },
73
- }
74
- },
75
- 'qwen1.5' : {
76
- 'default' : 'moe' ,
77
- 'brief' : 'Qwen1.5 is the beta version of Qwen2 from Alibaba group.' ,
78
- 'variants' : {
79
- '1.8b' : {
80
- 'default' : 'q8' ,
81
- 'quantized' : {
82
- 'q8' : model_on_modelscope ('chatllm_quantized_models' , 'qwen1.5-1.8b.bin' )
83
- }
84
- },
85
- 'moe' : {
86
- 'default' : 'q4_1' ,
87
- 'quantized' : {
88
- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'qwen1.5-moe-q4_1.bin' )
89
- }
90
- },
91
- }
92
- },
93
- 'qanything' : {
94
- 'default' : '7b' ,
95
- 'brief' : 'QAnything is a local knowledge base question-answering system based on QwenLM.' ,
96
- 'variants' : {
97
- '7b' : {
98
- 'default' : 'q4_1' ,
99
- 'quantized' : {
100
- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'qwen-qany-7b-q4_1.bin' )
101
- }
102
- },
103
- }
104
- },
105
- 'starling-lm' : {
106
- 'default' : '7b' ,
107
- 'brief' : 'Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.' ,
108
- 'variants' : {
109
- '7b' : {
110
- 'default' : 'q4_1' ,
111
- 'quantized' : {
112
- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'starling-7b-q4_1.bin' )
113
- }
114
- },
115
- }
116
- },
117
- 'yi-1' : {
118
- 'default' : '34b' ,
119
- 'brief' : 'Yi (v1) is a high-performing, bilingual language model.' ,
120
- 'variants' : {
121
- '34b' : {
122
- 'default' : 'q4_1' ,
123
- 'quantized' : {
124
- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'yi-34b-q4.bin' )
125
- }
126
- },
127
- }
128
- },
129
- }
2
+ import os , json
3
+
4
+ import binding
5
+
6
+ def get_model_url_on_modelscope (proj : str , fn : str , user : str = 'judd2024' ) -> str :
7
+ return f"https://modelscope.cn/api/v1/models/{ user } /{ proj } /repo?Revision=master&FilePath={ fn } "
8
+
9
+ with open (os .path .join (binding .PATH_SCRIPTS , 'models.json' )) as f :
10
+ all_models = json .load (f )
130
11
131
12
def print_progress_bar (iteration , total , prefix = '' , suffix = '' , decimals = 1 , length = 60 , fill = '█' , printEnd = "\r " , auto_nl = True ):
132
13
percent = ("{0:." + str (decimals ) + "f}" ).format (100 * (iteration / float (total )))
@@ -166,6 +47,7 @@ def show_variants(info, default):
166
47
def show_model (m ):
167
48
info = all_models [m ]
168
49
print (f"**{ m } **: { info ['brief' ]} " )
50
+ print (f"License : { info ['license' ]} " )
169
51
show_variants (info ['variants' ], info ['default' ])
170
52
print ()
171
53
@@ -176,8 +58,12 @@ def parse_model_id(model_id: str):
176
58
parts = model_id .split (':' )
177
59
model = all_models [parts [0 ]]
178
60
variants = model ['variants' ]
179
- var = variants [parts [1 ]] if len (parts ) >= 2 else variants ['default' ]
180
- return var ['quantized' ][var ['default' ]]
61
+ var = variants [parts [1 ] if len (parts ) >= 2 else model ['default' ]]
62
+ r = var ['quantized' ][var ['default' ]]
63
+ url = r ['url' ].split ('/' )
64
+ r ['url' ] = get_model_url_on_modelscope (* url )
65
+ r ['fn' ] = url [1 ]
66
+ return r
181
67
182
68
def get_model (model_id , storage_dir ):
183
69
if not os .path .isdir (storage_dir ):
@@ -187,9 +73,13 @@ def get_model(model_id, storage_dir):
187
73
info = parse_model_id (model_id )
188
74
fn = os .path .join (storage_dir , info ['fn' ])
189
75
if os .path .isfile (fn ):
190
- return fn
76
+ if os .path .getsize (fn ) == info ['size' ]:
77
+ return fn
78
+ else :
79
+ print (f"{ fn } is incomplete, download again" )
191
80
192
81
assert download_file (info ['url' ], fn , model_id ), f"failed to download { model_id } "
82
+ assert os .path .getsize (fn ) == info ['size' ], f"downloaded file size mismatch!"
193
83
194
84
return fn
195
85
@@ -200,13 +90,13 @@ def find_index(l: list, x) -> int:
200
90
return - 1
201
91
202
92
def preprocess_args (args : list [str ], storage_dir ) -> list [str ]:
203
- i = find_index ( args , '-m' )
204
- if i < 0 :
205
- i = find_index (args , '--model' )
206
- if i < 0 :
207
- return args
208
- if args [i + 1 ].startswith (':' ):
209
- args [i + 1 ] = get_model (args [i + 1 ][1 :], storage_dir )
93
+ candidates = [ '-m' , '--model' , '--embedding_model' , '--reranker_model' ]
94
+ for param in candidates :
95
+ i = find_index (args , param )
96
+ if i < 0 : continue
97
+
98
+ if args [i + 1 ].startswith (':' ):
99
+ args [i + 1 ] = get_model (args [i + 1 ][1 :], storage_dir )
210
100
211
101
return args
212
102
0 commit comments