C#中使用Whisper实现语音输入

原创已于 2026-04-10 11:58:08 修改 · 356 阅读

7 ·

CC 4.0 BY-SA版权

文章标签：

#c# #whisper #开发语言

于 2026-04-10 11:56:00 首次发布

C# 专栏收录该内容

8 篇文章

订阅专栏

接上篇说的Whisper实时语音输入，这篇文章主要介绍语音输入文本的做法。

一、思路

思路：录制音频文件->检测长时间无语音输入时则自动停止录制->识别音频文件并输出。
其实就是将识别本地音频文件替换为录制音频，本质上还是识别的音频文件内容。

二、NuGet包下载

要实现音频录制，需要下载NAudio程序包。
右键项目->“管理NuGet程序包”->搜索并安装NAudio
在这里插入图片描述

三、声音阈值设置

判断静音通常不是直接判断是否有声音，日常环境中很难做到完全静音，所以需要设置一个声音阈值silenceThreshold，低于该阈值则判断为静音状态。
但是麦克风的采样率是线性PCM，不是dB，所以就需要计算一下。计算公式为dB = 20 * log10(amplitude)；20 dB 对应的线性幅度为：

-20 = 20 * log10(x)
x = 10^(-20/20)
x = 10^-1 = 0.1

因为PCM音频里的规则是：0 dBFS = 最大音量 = (amplitude )1.0;
所有正常音频：dB ≤ 0; 范围是 [-1,1]，所以实际上：

分贝	实际 amplitude
0 dB	1.0
-20 dB	0.1

20分贝阈值应该用silenceThreshold = 0.1f;。
（注：以上分贝计算来源于网络）

四、实现代码

总共也就不到三百行的代码，其中一半还是注释，这里也就老规矩，全贴了。


    public partial class FormTestUI : Form
    {
        private string whisperExe = Path.Combine(
           Application.StartupPath,
           "Whisper",
           "whisper-cli.exe"
       );

        private string modelPath = Path.Combine(
            Application.StartupPath,
            "Whisper",
            "ggml-small.bin"
        );

        /// <summary>
        /// 临时录音文件
        /// </summary>
        private string tempWavPath = Path.Combine(Application.StartupPath, "temp_record.wav");

        /// <summary>
        /// 麦克风录音
        /// </summary>
        private WaveInEvent waveSource;

        /// <summary>
        /// wav 写入器
        /// </summary>
        private WaveFileWriter waveWriter;

        /// <summary>
        /// 当前 whisper 进程
        /// </summary>
        private Process whisperProcess;

        /// <summary>
        /// 是否正在录音
        /// </summary>
        private bool isRecording = false;

        /// <summary>
        /// 当前音量
        /// </summary>
        private float currentVolume = 0f;

        /// <summary>
        /// 声音分贝阈值
        ///  0.1f ≈ 20dB
        /// </summary>
        private float silenceThreshold = 0.1f;

        /// <summary>
        /// 静音时长（ms)
        /// </summary>
        private int silenceDuration = 1500;

        /// <summary>
        /// 用于静音时长计时
        /// </summary>
        private Stopwatch silenceWatch = new Stopwatch();


        public FormTestUI()
        {
            InitializeComponent();
        }

        private async void VoiceStart_Click(object sender, EventArgs e)
        {
            // 检查文件是否存在
            if (!File.Exists(whisperExe))
            {
                MessageBox.Show("未找到 whisper-cli.exe");
                return;
            }

            if (!File.Exists(modelPath))
            {
                MessageBox.Show("未找到 Whisper 模型文件");
                return;
            }

            Console.WriteLine("开始");

            StartRecording();
        }


        /// <summary>
        /// 开始录音
        /// </summary>
        private async void StartRecording()
        {
            silenceWatch.Restart();

            waveSource = new WaveInEvent
            {
                WaveFormat = new WaveFormat(16000, 1),
                BufferMilliseconds = 100
            };
            waveSource.DataAvailable += WaveSource_DataAvailable;
            waveSource.RecordingStopped += WaveSource_RecordingStopped;

            waveWriter = new WaveFileWriter(tempWavPath, waveSource.WaveFormat);

            isRecording = true;

            waveSource.StartRecording();

            // 启动静音检测线程
            await Task.Run(CheckSilenceLoop);

        }


        /// <summary>
        /// 录音数据写入 wav
        /// </summary>
        private void WaveSource_DataAvailable(object sender, WaveInEventArgs e)
        {

            if (!isRecording) return;

            waveWriter.Write(e.Buffer, 0, e.BytesRecorded);
            waveWriter.Flush();

            // ===== 单独做音量检测 =====
            float max = 0;
            for (int i = 0; i < e.BytesRecorded; i += 2)
            {
                short sample = (short)((e.Buffer[i + 1] << 8) | e.Buffer[i]);
                float sample32 = sample / 32768f;
                if (Math.Abs(sample32) > max)
                    max = Math.Abs(sample32);
            }

            currentVolume = max;

            // 有声音 → 重置计时
            Console.WriteLine($"当前音量 = {currentVolume}；阈值 = {silenceThreshold}");
            if (currentVolume > silenceThreshold)
            {
                Console.WriteLine("有声音，重置计时");
                silenceWatch.Restart();
            }
            else
            {
                Console.WriteLine("无声音");
            }

        }


        /// <summary>
        /// 静音检测线程（100ms检测）
        /// </summary>
        /// <returns></returns>
        private async Task CheckSilenceLoop()
        {
            while (isRecording)
            {
                // 每100ms检测一次是否有声音
                await Task.Delay(100);

                // 持续1500ms（1.5s）没有声音则自动停止并识别
                if (silenceWatch.ElapsedMilliseconds > silenceDuration)
                {
                    Console.WriteLine("检测到持续1.5秒静音，自动停止");

                    Invoke(new Action(async () =>
                    {
                        await StopAndRecognize();
                    }));

                    break;
                }
            }
        }

        /// <summary>
        /// 统一停止 + 识别
        /// </summary>
        /// <returns></returns>
        private async Task StopAndRecognize()
        {
            if (!isRecording) return;

            try
            {
                isRecording = false;

                waveSource?.StopRecording();

                await Task.Delay(300); // 等待文件释放

                await RunWhisperRecognition(tempWavPath);

                Console.WriteLine("识别完成");
            }
            catch (Exception ex)
            {
                MessageBox.Show("停止失败：" + ex.Message);
            }
        }

        /// <summary>
        /// 录音结束事件
        /// </summary>
        private void WaveSource_RecordingStopped(object sender, StoppedEventArgs e)
        {
            waveSource.Dispose();
            waveSource = null;

            waveWriter?.Dispose();
            waveWriter = null;
        }

        /// <summary>
        /// 按钮手动停止
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private async void VoiceEnd_Click(object sender, EventArgs e)
        {
            await StopAndRecognize();
        }

        /// <summary>
        /// 执行 Whisper 识别
        /// </summary>
        private async Task RunWhisperRecognition(String tempWavPath)
        {
            Console.WriteLine("开始识别");
            try
            {
                await Task.Run(() =>
                {
                    ProcessStartInfo psi = new ProcessStartInfo
                    {
                        FileName = whisperExe,
                        Arguments = $"--model \"{modelPath}\" --file \"{tempWavPath}\"  --language zh",
                        RedirectStandardOutput = true,
                        RedirectStandardError = true,
                        UseShellExecute = false,
                        CreateNoWindow = true,
                        StandardOutputEncoding = Encoding.UTF8,
                        StandardErrorEncoding = Encoding.UTF8
                    };

                    whisperProcess = new Process();
                    whisperProcess.StartInfo = psi;

                    whisperProcess.OutputDataReceived += (s, e) =>
                    {
                        if (!string.IsNullOrEmpty(e.Data))
                        {
                            Invoke(new Action(() =>
                            {
                                // 输出识别到的语音文本
                                Console.WriteLine(e.Data + "\r\n");
                                rtLabelBlack1.Text = e.Data;
                            }));
                        }
                    };

                    // 不把 stderr 当作错误显示
                    whisperProcess.ErrorDataReceived += (s, e) =>
                    {
                        // 如需调试可以打印到控制台
                        //Console.WriteLine(e.Data);
                    };

                    whisperProcess.Start();
                    whisperProcess.BeginOutputReadLine();
                    whisperProcess.BeginErrorReadLine();

                    whisperProcess.WaitForExit();

                    Invoke(new Action(() =>
                    {
                        Console.WriteLine("\r\n识别结束。\r\n");
                    }));
                });

            }
            catch (Exception ex)
            {
                MessageBox.Show("识别失败：" + ex.Message);
            }
        }

    }

五、代码片段解析

初始化麦克风采集参数
· 16000是采样率（Hz）。这里需要注意的是低于16000（16K）的采样率有可能会影响语音识别，建议设置为16K，数据量小，性能更好；
· 1是指声道数；
· BufferMilliseconds = 100是指每100ms触发一次，也就是给你一段音频，100ms的响应性能相对优秀。
```
waveSource = new WaveInEvent
{
    WaveFormat = new WaveFormat(16000, 1),
    BufferMilliseconds = 100
};
```
从PCM音频数据中找出当前这段音频的最大音量（峰值）
· i += 2的原因是音频格式是 16-bit PCM，每个采样点 = 2 字节；
· short sample = (short)((e.Buffer[i + 1] << 8) | e.Buffer[i]);小端序（Little Endian）解析，内存是低字节在前，高字节在后，拼接过程为sample = 高字节 << 8 | 低字节，等价于short sample = BitConverter.ToInt16(e.Buffer, i);
· float sample32 = sample / 32768f;是归一化，将[-32768, 32767]转换为[-1, 1]（PCM的范围是[-1, 1]）。
```
float max = 0;
 for (int i = 0; i < e.BytesRecorded; i += 2)
 {
     short sample = (short)((e.Buffer[i + 1] << 8) | e.Buffer[i]);
     float sample32 = sample / 32768f;
     if (Math.Abs(sample32) > max)
         max = Math.Abs(sample32);
 }
```
感谢看完全文，比心！