Skip to content

Commit b92a14a

Browse files
authored
llama : support InfiniAI Megrez 3b (ggml-org#10893)
* Support InfiniAI Megrez 3b * Fix tokenizer_clean_spaces for megrez
1 parent 6f0c9e0 commit b92a14a

File tree

4 files changed

+25
-0
lines changed

4 files changed

+25
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
684684
if chkhsh == "ad851be1dba641f2e3711822f816db2c265f788b37c63b4e1aeacb9ee92de8eb":
685685
# ref: https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct
686686
res = "gigachat"
687+
if chkhsh == "d4c8f286ea6b520b3d495c4455483cfa2302c0cfcd4be05d781b6a8a0a7cdaf1":
688+
# ref: https://huggingface.co/Infinigence/Megrez-3B-Instruct
689+
res = "megrez"
687690

688691
if res is None:
689692
logger.warning("\n")

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class TOKENIZER_TYPE(IntEnum):
106106
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
107107
{"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
108108
{"name": "gigachat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct"},
109+
{"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
109110
]
110111

111112

src/llama.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1720,6 +1720,7 @@ enum llm_chat_template {
17201720
LLM_CHAT_TEMPLATE_RWKV_WORLD,
17211721
LLM_CHAT_TEMPLATE_GRANITE,
17221722
LLM_CHAT_TEMPLATE_GIGACHAT,
1723+
LLM_CHAT_TEMPLATE_MEGREZ,
17231724
LLM_CHAT_TEMPLATE_UNKNOWN,
17241725
};
17251726

@@ -1753,6 +1754,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
17531754
{ "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
17541755
{ "granite", LLM_CHAT_TEMPLATE_GRANITE },
17551756
{ "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
1757+
{ "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
17561758
};
17571759

17581760
static llm_arch llm_arch_from_string(const std::string & name) {
@@ -6703,6 +6705,9 @@ static void llm_load_vocab(
67036705
} else if (
67046706
tokenizer_pre == "minerva-7b") {
67056707
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MINERVA;
6708+
} else if (
6709+
tokenizer_pre == "megrez") {
6710+
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
67066711
} else {
67076712
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
67086713
}
@@ -22931,6 +22936,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2293122936
return LLM_CHAT_TEMPLATE_GRANITE;
2293222937
} else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
2293322938
return LLM_CHAT_TEMPLATE_GIGACHAT;
22939+
} else if (tmpl_contains("<|role_start|>")) {
22940+
return LLM_CHAT_TEMPLATE_MEGREZ;
2293422941
}
2293522942
return LLM_CHAT_TEMPLATE_UNKNOWN;
2293622943
}
@@ -23289,6 +23296,16 @@ static int32_t llama_chat_apply_template_internal(
2328923296
if (add_ass) {
2329023297
ss << "assistant<|role_sep|>";
2329123298
}
23299+
} else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
23300+
// Megrez template
23301+
for (auto message : chat) {
23302+
std::string role(message->role);
23303+
ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
23304+
}
23305+
23306+
if (add_ass) {
23307+
ss << "<|role_start|>assistant<|role_end|>";
23308+
}
2329223309
} else {
2329323310
// template not supported
2329423311
return -1;

tests/test-chat-template.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ int main(void) {
7777
"{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT] ' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}",
7878
// ai-sage/GigaChat-20B-A3B-instruct
7979
"{% if messages[0]['role'] == 'system' -%}\n {%- set loop_messages = messages[1:] -%}\n {%- set system_message = bos_token + messages[0]['content'] + additional_special_tokens[1] -%}\n{%- else -%}\n {%- set loop_messages = messages -%}\n {%- set system_message = bos_token + '' -%}\n{%- endif -%}\n{%- for message in loop_messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n \n {%- if loop.index0 == 0 -%}\n {{ system_message -}}\n {%- endif -%}\n {%- if message['role'] == 'user' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {{ 'available functions' + additional_special_tokens[0] + additional_special_tokens[2] + additional_special_tokens[3] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if message['role'] == 'assistant' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if loop.last and add_generation_prompt -%}\n {{ 'assistant' + additional_special_tokens[0] -}}\n {%- endif -%}\n{%- endfor %}",
80+
// Infinigence/Megrez-3B-Instruct
81+
u8"{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"
8082
};
8183
std::vector<std::string> expected_output = {
8284
// teknium/OpenHermes-2.5-Mistral-7B
@@ -133,6 +135,8 @@ int main(void) {
133135
"[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT][INST] Hello[/INST] Hi there</s>[INST] Who are you[/INST] I am an assistant </s>[INST] Another question[/INST]",
134136
// ai-sage/GigaChat-20B-A3B-instruct
135137
"<s>You are a helpful assistant<|message_sep|>user<|role_sep|>Hello<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>Hi there<|message_sep|>user<|role_sep|>Who are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|> I am an assistant <|message_sep|>user<|role_sep|>Another question<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>",
138+
// Infinigence/Megrez-3B-Instruct
139+
"<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|> I am an assistant <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>",
136140
};
137141
std::vector<char> formatted_chat(1024);
138142
int32_t res;

0 commit comments

Comments
 (0)