-
Notifications
You must be signed in to change notification settings - Fork 12.1k
llama-model : add dots.llm1 architecture support (#14044) #14118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -343,6 +343,7 @@ class MODEL_ARCH(IntEnum): | |||
WAVTOKENIZER_DEC = auto() | ||||
PLM = auto() | ||||
BAILINGMOE = auto() | ||||
DOTS1 = auto() | ||||
|
||||
|
||||
class VISION_PROJECTOR_TYPE(IntEnum): | ||||
|
@@ -623,6 +624,7 @@ class MODEL_TENSOR(IntEnum): | |||
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec", | ||||
MODEL_ARCH.PLM: "plm", | ||||
MODEL_ARCH.BAILINGMOE: "bailingmoe", | ||||
MODEL_ARCH.DOTS1: "dots1" | ||||
} | ||||
|
||||
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = { | ||||
|
@@ -2044,6 +2046,31 @@ class MODEL_TENSOR(IntEnum): | |||
MODEL_TENSOR.FFN_DOWN_SHEXP, | ||||
MODEL_TENSOR.FFN_UP_SHEXP, | ||||
], | ||||
MODEL_ARCH.DOTS1: [ | ||||
MODEL_TENSOR.TOKEN_EMBD, | ||||
MODEL_TENSOR.OUTPUT_NORM, | ||||
MODEL_TENSOR.OUTPUT, | ||||
MODEL_TENSOR.ATTN_NORM, | ||||
MODEL_TENSOR.ATTN_Q, | ||||
MODEL_TENSOR.ATTN_Q_NORM, | ||||
MODEL_TENSOR.ATTN_K, | ||||
MODEL_TENSOR.ATTN_K_NORM, | ||||
MODEL_TENSOR.ATTN_V, | ||||
MODEL_TENSOR.ATTN_OUT, | ||||
MODEL_TENSOR.ATTN_ROT_EMBD, | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
MODEL_TENSOR.FFN_EXP_PROBS_B, | ||||
MODEL_TENSOR.FFN_NORM, | ||||
MODEL_TENSOR.FFN_GATE, | ||||
MODEL_TENSOR.FFN_GATE_EXP, | ||||
MODEL_TENSOR.FFN_GATE_INP, | ||||
MODEL_TENSOR.FFN_GATE_SHEXP, | ||||
MODEL_TENSOR.FFN_DOWN, | ||||
MODEL_TENSOR.FFN_DOWN_EXP, | ||||
MODEL_TENSOR.FFN_DOWN_SHEXP, | ||||
MODEL_TENSOR.FFN_UP, | ||||
MODEL_TENSOR.FFN_UP_EXP, | ||||
MODEL_TENSOR.FFN_UP_SHEXP, | ||||
], | ||||
# TODO | ||||
} | ||||
|
||||
|
@@ -2099,6 +2126,10 @@ class MODEL_TENSOR(IntEnum): | |||
MODEL_ARCH.BAILINGMOE: [ | ||||
MODEL_TENSOR.ROPE_FREQS, | ||||
], | ||||
MODEL_ARCH.DOTS1: [ | ||||
MODEL_TENSOR.ROPE_FREQS, | ||||
MODEL_TENSOR.ATTN_ROT_EMBD, | ||||
], | ||||
Comment on lines
+2129
to
+2132
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is not necessary |
||||
} | ||||
|
||||
# | ||||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -183,6 +183,11 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { | |||||||||||||||
return LLM_CHAT_TEMPLATE_BAILING; | ||||||||||||||||
} else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) { | ||||||||||||||||
return LLM_CHAT_TEMPLATE_LLAMA4; | ||||||||||||||||
} else if (tmpl_contains("<|userprompt|>") && | ||||||||||||||||
tmpl_contains("<|endofuserprompt|>") && | ||||||||||||||||
tmpl_contains("<|response|>") && | ||||||||||||||||
tmpl_contains("<|endofresponse|>")) { | ||||||||||||||||
return LLM_CHAT_TEMPLATE_DOTS1; | ||||||||||||||||
Comment on lines
+186
to
+190
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
we don't need to check exhaustively since this is the only model which uses this |
||||||||||||||||
} | ||||||||||||||||
return LLM_CHAT_TEMPLATE_UNKNOWN; | ||||||||||||||||
} | ||||||||||||||||
|
@@ -643,6 +648,21 @@ int32_t llm_chat_apply_template( | |||||||||||||||
if (add_ass) { | ||||||||||||||||
ss << "Assistant:"; | ||||||||||||||||
} | ||||||||||||||||
} else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) { | ||||||||||||||||
// dots.llm1.inst (DOTS1) | ||||||||||||||||
for (auto message : chat) { | ||||||||||||||||
std::string role(message->role); | ||||||||||||||||
if (role == "system") { | ||||||||||||||||
ss << "<|system|>" << message->content << "<|endofsystem|>"; | ||||||||||||||||
} else if (role == "user") { | ||||||||||||||||
ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>"; | ||||||||||||||||
} else if (role == "assistant") { | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
To make it the same as other code blocks |
||||||||||||||||
ss << "<|response|>" << message->content << "<|endofresponse|>"; | ||||||||||||||||
} | ||||||||||||||||
} | ||||||||||||||||
if (add_ass) { | ||||||||||||||||
ss << "<|response|>"; | ||||||||||||||||
} | ||||||||||||||||
} else { | ||||||||||||||||
// template not supported | ||||||||||||||||
return -1; | ||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a more simple way for the conversion code: https://github.com/ngxson/llama.cpp/blob/b469d9b86e148c4d7538ad27f817cf83bc2fb339/convert_hf_to_gguf.py#L3070-L3087