Skip to content

Commit c06972a

Browse files
author
Judd
committed
update model downloader, etc.
1 parent bcaf9a4 commit c06972a

File tree

8 files changed

+275
-157
lines changed

8 files changed

+275
-157
lines changed

docs/tool_calling.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Demos of tool calling for these models are provided:
77
* [GLM-4](../scripts/tool_glm4.py)
88
* [Mistral-Instruct-7B-v0.3](../scripts/tool_mistral.py)
99
* [QWen v1.5 & v2](../scripts/tool_qwen.py)
10-
* [DeepSeek Coder v2](../scripts/tool_deepseekcoder.py) (Note: function calling is *officially* unsupported.)
10+
* [DeepSeek-Coder v2](../scripts/tool_deepseekcoder.py) (Note: function calling is *officially* unsupported.)
1111

1212
## Precondition
1313

@@ -134,7 +134,7 @@ You > which city is hotter?
134134
A.I. > Jinan is hotter than Beijing. Jinan's temperature is 36°C and Beijing's temperature is 33°C.
135135
```
136136

137-
### DeepSeek Coder v2
137+
### DeepSeek-Coder v2
138138

139139
```
140140
python tool_deepseekcoder.py -i -m /path/to/deepseekcoder-v2-lite-model.bin

scripts/binding.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import sys, os
22

33
this_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
4-
PATH_BINDS = os.path.join(this_dir, '..', 'bindings')
4+
PATH_APP = os.path.abspath(os.path.join(this_dir, '..'))
5+
PATH_BINDS = os.path.join(PATH_APP, 'bindings')
6+
PATH_SCRIPTS = os.path.join(PATH_APP, 'scripts')
57
sys.path.append(PATH_BINDS)

scripts/model_downloader.py

Lines changed: 28 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -1,132 +1,13 @@
11
import requests
2-
import os
3-
4-
def model_on_modelscope(proj: str, fn: str, user: str = 'judd2024') -> dict:
5-
url = f"https://modelscope.cn/api/v1/models/{user}/{proj}/repo?Revision=master&FilePath={fn}"
6-
return { 'fn': fn, 'url': url }
7-
8-
all_models = {
9-
'qwen2': {
10-
'default': '1.5b',
11-
'brief': 'Qwen2 is a new series of large language models from Alibaba group.',
12-
'variants': {
13-
'7b': {
14-
'default': 'q8',
15-
'quantized': {
16-
'q8': model_on_modelscope('chatllm_quantized_qwen2', 'qwen2-7b.bin')
17-
}
18-
},
19-
'1.5b': {
20-
'default': 'q8',
21-
'quantized': {
22-
'q8': model_on_modelscope('chatllm_quantized_qwen2', 'qwen2-1.5b.bin')
23-
}
24-
},
25-
'0.5b': {
26-
'default': 'q8',
27-
'quantized': {
28-
'q8': model_on_modelscope('chatllm_quantized_qwen2', 'qwen2-0.5b.bin')
29-
}
30-
},
31-
}
32-
},
33-
'gemma': {
34-
'default': '2b',
35-
'brief': 'Gemma is a family of lightweight, state-of-the-art open models built by Google DeepMind. Updated to version 1.1.',
36-
'variants': {
37-
'2b': {
38-
'default': 'q8',
39-
'quantized': {
40-
'q8': model_on_modelscope('chatllm_quantized_models', 'gemma-1.1-2b.bin')
41-
}
42-
},
43-
}
44-
},
45-
'llama3': {
46-
'default': '8b',
47-
'brief': 'Meta Llama 3: The most capable openly available LLM to date.',
48-
'variants': {
49-
'8b': {
50-
'default': 'q4_1',
51-
'quantized': {
52-
'q4_1': model_on_modelscope('chatllm_quantized_models', 'llama3-8b-q4_1.bin')
53-
}
54-
},
55-
}
56-
},
57-
'minicpm': {
58-
'default': '2b-sft',
59-
'brief': 'Meta Llama 3: The most capable openly available LLM to date.',
60-
'variants': {
61-
'2b-sft': {
62-
'default': 'q8',
63-
'quantized': {
64-
'q8': model_on_modelscope('chatllm_quantized_models', 'minicpm_sft_q8.bin')
65-
}
66-
},
67-
'2b-dpo': {
68-
'default': 'q4_1',
69-
'quantized': {
70-
'q4_1': model_on_modelscope('chatllm_quantized_models', 'minicpm-dpo-q4_1.bin')
71-
}
72-
},
73-
}
74-
},
75-
'qwen1.5': {
76-
'default': 'moe',
77-
'brief': 'Qwen1.5 is the beta version of Qwen2 from Alibaba group.',
78-
'variants': {
79-
'1.8b': {
80-
'default': 'q8',
81-
'quantized': {
82-
'q8': model_on_modelscope('chatllm_quantized_models', 'qwen1.5-1.8b.bin')
83-
}
84-
},
85-
'moe': {
86-
'default': 'q4_1',
87-
'quantized': {
88-
'q4_1': model_on_modelscope('chatllm_quantized_models', 'qwen1.5-moe-q4_1.bin')
89-
}
90-
},
91-
}
92-
},
93-
'qanything': {
94-
'default': '7b',
95-
'brief': 'QAnything is a local knowledge base question-answering system based on QwenLM.',
96-
'variants': {
97-
'7b': {
98-
'default': 'q4_1',
99-
'quantized': {
100-
'q4_1': model_on_modelscope('chatllm_quantized_models', 'qwen-qany-7b-q4_1.bin')
101-
}
102-
},
103-
}
104-
},
105-
'starling-lm': {
106-
'default': '7b',
107-
'brief': 'Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.',
108-
'variants': {
109-
'7b': {
110-
'default': 'q4_1',
111-
'quantized': {
112-
'q4_1': model_on_modelscope('chatllm_quantized_models', 'starling-7b-q4_1.bin')
113-
}
114-
},
115-
}
116-
},
117-
'yi-1': {
118-
'default': '34b',
119-
'brief': 'Yi (v1) is a high-performing, bilingual language model.',
120-
'variants': {
121-
'34b': {
122-
'default': 'q4_1',
123-
'quantized': {
124-
'q4_1': model_on_modelscope('chatllm_quantized_models', 'yi-34b-q4.bin')
125-
}
126-
},
127-
}
128-
},
129-
}
2+
import os, json
3+
4+
import binding
5+
6+
def get_model_url_on_modelscope(proj: str, fn: str, user: str = 'judd2024') -> str:
7+
return f"https://modelscope.cn/api/v1/models/{user}/{proj}/repo?Revision=master&FilePath={fn}"
8+
9+
with open(os.path.join(binding.PATH_SCRIPTS, 'models.json')) as f:
10+
all_models = json.load(f)
13011

13112
def print_progress_bar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 60, fill = '█', printEnd = "\r", auto_nl = True):
13213
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
@@ -166,6 +47,7 @@ def show_variants(info, default):
16647
def show_model(m):
16748
info = all_models[m]
16849
print(f"**{m}**: {info['brief']}")
50+
print(f"License : {info['license']}")
16951
show_variants(info['variants'], info['default'])
17052
print()
17153

@@ -176,8 +58,12 @@ def parse_model_id(model_id: str):
17658
parts = model_id.split(':')
17759
model = all_models[parts[0]]
17860
variants = model['variants']
179-
var = variants[parts[1]] if len(parts) >= 2 else variants['default']
180-
return var['quantized'][var['default']]
61+
var = variants[parts[1] if len(parts) >= 2 else model['default']]
62+
r = var['quantized'][var['default']]
63+
url = r['url'].split('/')
64+
r['url'] = get_model_url_on_modelscope(*url)
65+
r['fn'] = url[1]
66+
return r
18167

18268
def get_model(model_id, storage_dir):
18369
if not os.path.isdir(storage_dir):
@@ -187,9 +73,13 @@ def get_model(model_id, storage_dir):
18773
info = parse_model_id(model_id)
18874
fn = os.path.join(storage_dir, info['fn'])
18975
if os.path.isfile(fn):
190-
return fn
76+
if os.path.getsize(fn) == info['size']:
77+
return fn
78+
else:
79+
print(f"{fn} is incomplete, download again")
19180

19281
assert download_file(info['url'], fn, model_id), f"failed to download {model_id}"
82+
assert os.path.getsize(fn) == info['size'], f"downloaded file size mismatch!"
19383

19484
return fn
19585

@@ -200,13 +90,13 @@ def find_index(l: list, x) -> int:
20090
return -1
20191

20292
def preprocess_args(args: list[str], storage_dir) -> list[str]:
203-
i = find_index(args, '-m')
204-
if i < 0:
205-
i = find_index(args, '--model')
206-
if i < 0:
207-
return args
208-
if args[i + 1].startswith(':'):
209-
args[i + 1] = get_model(args[i + 1][1:], storage_dir)
93+
candidates = ['-m', '--model', '--embedding_model', '--reranker_model']
94+
for param in candidates:
95+
i = find_index(args, param)
96+
if i < 0: continue
97+
98+
if args[i + 1].startswith(':'):
99+
args[i + 1] = get_model(args[i + 1][1:], storage_dir)
210100

211101
return args
212102

0 commit comments

Comments
 (0)