Skip to content

Commit 485dc01

Browse files
authored
server : add system_fingerprint to chat/completion (ggml-org#10917)
* server : add system_fingerprint to chat/completion * update README
1 parent 86bf31c commit 485dc01

File tree

4 files changed

+25
-15
lines changed

4 files changed

+25
-15
lines changed

examples/server/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,8 @@ This endpoint is public (no API key check). By default, it is read-only. To make
724724
},
725725
"total_slots": 1,
726726
"model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
727-
"chat_template": "..."
727+
"chat_template": "...",
728+
"build_info": "b(build number)-(build commit hash)"
728729
}
729730
```
730731

examples/server/server.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -595,10 +595,11 @@ struct server_task_result_cmpl_final : server_task_result {
595595
std::time_t t = std::time(0);
596596

597597
json res = json {
598-
{"choices", json::array({choice})},
599-
{"created", t},
600-
{"model", oaicompat_model},
601-
{"object", "chat.completion"},
598+
{"choices", json::array({choice})},
599+
{"created", t},
600+
{"model", oaicompat_model},
601+
{"system_fingerprint", build_info},
602+
{"object", "chat.completion"},
602603
{"usage", json {
603604
{"completion_tokens", n_decoded},
604605
{"prompt_tokens", n_prompt_tokens},
@@ -632,11 +633,12 @@ struct server_task_result_cmpl_final : server_task_result {
632633
};
633634

634635
json ret = json {
635-
{"choices", json::array({choice})},
636-
{"created", t},
637-
{"id", oaicompat_cmpl_id},
638-
{"model", oaicompat_model},
639-
{"object", "chat.completion.chunk"},
636+
{"choices", json::array({choice})},
637+
{"created", t},
638+
{"id", oaicompat_cmpl_id},
639+
{"model", oaicompat_model},
640+
{"system_fingerprint", build_info},
641+
{"object", "chat.completion.chunk"},
640642
{"usage", json {
641643
{"completion_tokens", n_decoded},
642644
{"prompt_tokens", n_prompt_tokens},
@@ -761,11 +763,12 @@ struct server_task_result_cmpl_partial : server_task_result {
761763
}
762764

763765
json ret = json {
764-
{"choices", choices},
765-
{"created", t},
766-
{"id", oaicompat_cmpl_id},
767-
{"model", oaicompat_model},
768-
{"object", "chat.completion.chunk"}
766+
{"choices", choices},
767+
{"created", t},
768+
{"id", oaicompat_cmpl_id},
769+
{"model", oaicompat_model},
770+
{"system_fingerprint", build_info},
771+
{"object", "chat.completion.chunk"}
769772
};
770773

771774
if (timings.prompt_n >= 0) {
@@ -3476,6 +3479,7 @@ int main(int argc, char ** argv) {
34763479
{ "total_slots", ctx_server.params_base.n_parallel },
34773480
{ "model_path", ctx_server.params_base.model },
34783481
{ "chat_template", llama_get_chat_template(ctx_server.model) },
3482+
{ "build_info", build_info },
34793483
};
34803484

34813485
res_ok(res, data);

examples/server/tests/unit/test_chat_completion.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
3131
})
3232
assert res.status_code == 200
3333
assert "cmpl" in res.body["id"] # make sure the completion id has the expected format
34+
assert res.body["system_fingerprint"].startswith("b")
3435
assert res.body["model"] == model if model is not None else server.model_alias
3536
assert res.body["usage"]["prompt_tokens"] == n_prompt
3637
assert res.body["usage"]["completion_tokens"] == n_predicted
@@ -63,6 +64,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte
6364
last_cmpl_id = None
6465
for data in res:
6566
choice = data["choices"][0]
67+
assert data["system_fingerprint"].startswith("b")
6668
assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
6769
if last_cmpl_id is None:
6870
last_cmpl_id = data["id"]
@@ -92,6 +94,7 @@ def test_chat_completion_with_openai_library():
9294
seed=42,
9395
temperature=0.8,
9496
)
97+
assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b")
9598
assert res.choices[0].finish_reason == "length"
9699
assert res.choices[0].message.content is not None
97100
assert match_regex("(Suddenly)+", res.choices[0].message.content)

examples/server/utils.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ static T json_value(const json & body, const std::string & key, const T & defaul
5656
}
5757
}
5858

59+
const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);
60+
5961
//
6062
// tokenizer and input processing utils
6163
//

0 commit comments

Comments
 (0)