Skip to content

Commit 5dad33e

Browse files
authored
Support fp8_e4m3 scale search (intel#11339)
* fp8e4m3 switch off * fix style
1 parent e50c890 commit 5dad33e

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

python/llm/src/ipex_llm/transformers/convert.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,13 @@ def convert_gptq(module, awq=False, llm_awq=False, act_order=False):
290290
def use_scale_search(model_config, qtype):
291291
if qtype == ggml_tensor_qtype["fp6"] and model_config.model_type not in ["qwen2"]:
292292
return True
293+
elif qtype == ggml_tensor_qtype["fp8_e4m3"] and \
294+
model_config.model_type not in ["qwen2", "baichuan"]:
295+
if model_config.model_type == "llama" and model_config.vocab_size == 128256 and \
296+
"instruct" in model_config._name_or_path.lower():
297+
# Llama-3-instruct
298+
return False
299+
return True
293300
return False
294301

295302

0 commit comments

Comments
 (0)