Skip to content

The ph3 model decodes Chinese characters and displays garbled characters. #7505

@williamlzw

Description

@williamlzw
Image
using Microsoft.ML.Tokenizers;
using Microsoft.ML.GenAI.Phi;
using Microsoft.ML.GenAI.Core;
using Microsoft.Extensions.AI;


public class Program
{
    public async static void TestPhi3()
    {
        string device = "cuda";
        var weightFolder = @"G:\model\Phi-3-mini-128k-instruct";
        var model = Phi3ForCausalLM.FromPretrained(weightFolder, "config.json", layersOnTargetDevice: -1, quantizeToInt4: true, targetDevice: device);
        var modelPath = Path.Join(weightFolder, "tokenizer.model");
        var tokenizer = Phi3TokenizerHelper.FromPretrained(modelPath);
        var pipeline = new CausalLMPipeline<Tokenizer, Phi3ForCausalLM>(tokenizer, model, device);
        var client = new Phi3CausalLMChatClient(pipeline);
        var task = """
            你能讲一个有趣的笑话吗?
            """;
        List<ChatMessage> _chatHistory = new();
        _chatHistory.Add(new ChatMessage(ChatRole.System, "你是一个助手,用中文回答用户的问题"));
        _chatHistory.Add(new ChatMessage(ChatRole.User, task));
        var options = new ChatOptions
        {
            StopSequences = ["<|end_of_text|>"],//phi3
            AdditionalProperties = new() { { "max_length", 2048 } },
        };
        await foreach (var response in client.GetStreamingResponseAsync(_chatHistory, options))
        {
            Console.Write(response.Text);
        }

        Console.WriteLine();
        Console.WriteLine("End!");
    }

    public static void Main()
    {
        TestPhi3();
    }
}
Image

Metadata

Metadata

Assignees

No one assigned

    Labels

    untriagedNew issue has not been triaged

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions