C#讯飞实时语音识别和语音合成

最近由于需要做一个关于语音的c#项目，经老师推荐使用了讯飞的语音识别和合成的接口，但由于网上资源关于c#实时语音识别这块实在是太少了，经查阅网上资源和研究源代码，最终完成了一个还算满意的demo，供各位后来者参考和借鉴，希望后来者能少走点弯路。以下是界面图：MSC模块导入及添加枚举常量等导入msc.dll，讯飞的语音识别和语音函数是封装起来的，需要我们import进去除此之外，我们

anpluto

2327人浏览 · 2020-11-21 22:54:14

anpluto · 2020-11-21 22:54:14 发布

最近由于需要做一个关于语音的c#项目，经老师推荐使用了讯飞的语音识别和合成的接口，但由于网上资源关于c#实时语音识别这块实在是太少了，经查阅网上资源和研究源代码，最终完成了一个还算满意的demo，供各位后来者参考和借鉴，希望后来者能少走点弯路。

以下是界面图：
demo界面

MSC模块导入及添加枚举常量等

导入msc.dll，讯飞的语音识别和语音函数是封装起来的，需要我们import进去
MSCDLL类
除此之外，我们还需要创建需要用到的枚举常量和结构体等

语音识别模块

这部分模块我们需要用到的是麦克风音频（NAudio）和语音识别类。

首先，连续的语音识别是需要用到麦克风的，这时候就需要创建一个麦克风类，来实现声音的获取，及对声音的识别。

其中较为关键的函数是wis_back_DataAvailable和OnDataAvailable，前一个是处理得到的音量，后一个是处理得到的数据。
wis_back_DataAvailable和OnDataAvailable具体实现。

private void wis_back_DataAvailable(object sender, WaveInEventArgs e)
{
	long sh = System.BitConverter.ToInt64(e.Buffer, 0);
	long width = (long)Math.Pow(2, 50);
	float svolume = Math.Abs(sh / width);
	if (svolume > 1500.0f) { svolume = 1500.0f; }//音量最大值
	if (svolume < 50.0f) { svolume = 50.0f; }//音量最小值
	this.volume = svolume / 15.0f;//最小3.3333333
	DataArrived(this, new DataArrivedEventArgs(this.volume));//激发音量数据到达事件
}

void OnDataAvailable(object sender, WaveInEventArgs e)
{
	totalBufferLength += e.Buffer.Length;
	secondsRecorded = (float)(totalBufferLength / 32000);

	VoiceData data = new VoiceData();
	for (int i = 0; i < e.Buffer.Length; i++)
	{
		data.data[i] = e.Buffer[i];
	}
	VoiceBuffer.Add(data);//添加录音数据

	if (volume < 4)//音量低于4则开始准备识别
	Ends = Ends - 1;
	else
	Ends = 5;

	if (Ends == 0)
	{
		if (VoiceBuffer.Count() > 5)
		{
			isr.RunIAT(VoiceBuffer);//调用语音识别 
		}
		VoiceBuffer.Clear();
		Ends = 5;
	}
}

其中VoiceData 是我创建的处理音频数据的类
VoiceData类

接下来是语音识别类，这是实现语音识别的关键类

其中有两个语音识别的函数，其区别是一个通过麦克风获取声音数据来识别，一个通过读取文件数据来识别。

// 语音识别(从麦克风中获取数据)
public void RunIAT(List<VoiceData> VoiceBuffer)
        {
            IntPtr session_id = IntPtr.Zero;//sessionID是本次识别的句柄
            string rec_result = string.Empty;//识别结果
            string hints = "正常结束";
            AudioStatus aud_stat = AudioStatus.ISR_AUDIO_SAMPLE_CONTINUE;//用来告知MSC音频发送是否完成
            EpStatus ep_stat = EpStatus.ISR_EP_LOOKING_FOR_SPEECH;//端点检测器所处的状态
            RecogStatus rec_stat = RecogStatus.ISR_REC_STATUS_SUCCESS;//识别器返回的状态，提醒用户及时开始\停止获取识别结果
            int errcode = (int)ErrorCode.MSP_SUCCESS;

            session_id = MSCDLL.QISRSessionBegin(null, QISRsession_begin_params, ref errcode);
            if ((int)ErrorCode.MSP_SUCCESS != errcode)
            {
                Console.WriteLine("QISRSessionBegin failed!（语音识别开始阶段） error code:" + errcode);
                return;
            }

            for (int i = 0; i < VoiceBuffer.Count(); i++)
            {
                aud_stat = AudioStatus.ISR_AUDIO_SAMPLE_CONTINUE;
                if (i == 0)
                    aud_stat = AudioStatus.ISR_AUDIO_SAMPLE_FIRST;
                errcode = MSCDLL.QISRAudioWrite(MSCDLL.PtrToStr(session_id), VoiceBuffer[i].data, (uint)VoiceBuffer[i].data.Length, aud_stat, ref ep_stat, ref rec_stat);
                if ((int)ErrorCode.MSP_SUCCESS != errcode)
                {
                    MSCDLL.QISRSessionEnd(MSCDLL.PtrToStr(session_id), null);
                    Console.WriteLine("QISRSessionEnd failed!（音频写入过程） error code:" + errcode);
                }
            }

            //写入最后一块音频（空的）
            errcode = MSCDLL.QISRAudioWrite(MSCDLL.PtrToStr(session_id), null, 0, AudioStatus.ISR_AUDIO_SAMPLE_LAST, ref ep_stat, ref rec_stat);
            if ((int)ErrorCode.MSP_SUCCESS != errcode)
            {
                Console.WriteLine("QISRAudioWrite failed!（写入最后一块音频） error code:" + errcode);
                return;
            }

            while (RecogStatus.ISR_REC_STATUS_SPEECH_COMPLETE != rec_stat)
            {
                IntPtr rslt = MSCDLL.QISRGetResult(MSCDLL.PtrToStr(session_id), ref rec_stat, 0, ref errcode);
                if ((int)ErrorCode.MSP_SUCCESS != errcode)
                {
                    Console.WriteLine("QISRGetResult failed！（获得结果阶段） error code: " + errcode);
                    break;
                }
                if (IntPtr.Zero != rslt)
                {
                    string tempRes = MSCDLL.PtrToStr(rslt);

                    rec_result = rec_result + tempRes;
                    if (rec_result.Length >= BUFFER_SIZE)
                    {
                        Console.WriteLine("no enough buffer for rec_result !\n");
                        break;
                    }
                }
                Thread.Sleep(100);//可省略
            }

            int errorcode = MSCDLL.QISRSessionEnd(MSCDLL.PtrToStr(session_id), hints);

            //语音识别结果
            if (rec_result.Length != 0)
            {   
                DataArrived(this, new DataArrivedEventArgs(rec_result));//订阅文本事件
                //返回错误代码10111时，可调用SpeechRecognition()函数执行MSPLogin
            }
        }

		/// <summary>
        /// 语音识别（通过文件的识别只能识别一分钟）
        /// </summary>
        /// <param name="audio_path"></param>
        /// <returns></returns>
        private StringBuilder RunIATFile(string audio_path)
        {
            byte[] audio_content;
            if (audio_path == null || audio_path == "")
            {
                Console.WriteLine("还没有选择语音文件");
                return null;
            }
            else
            {
                audio_content = File.ReadAllBytes(audio_path);
                player = new SoundPlayer(audio_path);
                player.Play();//播放语音
            }
            IntPtr session_id;
            StringBuilder result = new StringBuilder();//存储最终识别的结果
            string hints = "正常结束";
            AudioStatus aud_stat = AudioStatus.ISR_AUDIO_SAMPLE_CONTINUE;//用来告知MSC音频发送是否完成
            EpStatus ep_stat = EpStatus.ISR_EP_LOOKING_FOR_SPEECH;//端点检测器所处的状态
            RecogStatus rec_stat = RecogStatus.ISR_REC_STATUS_SUCCESS;//识别器返回的状态，提醒用户及时开始\停止获取识别结果
            RecogStatus rec_rslt = RecogStatus.ISR_REC_STATUS_SUCCESS;
            int errcode = (int)ErrorCode.MSP_SUCCESS;

            session_id = MSCDLL.QISRSessionBegin(null, QISRsession_begin_params, ref errcode);
            if (errcode != (int)ErrorCode.MSP_SUCCESS)
            {
                Console.WriteLine("开始一次语音识别失败！");
                MSCDLL.QISRSessionEnd(MSCDLL.PtrToStr(session_id), hints);
                return null;
            }

            #region 边读取文件边识别，效率高
            FileStream fp = new FileStream(audio_path, FileMode.Open)
            {
                Position = 44//wav文件要求
            };
            int len;
            int buff_num = 1024 * 20;

            byte[] buff = new byte[buff_num];
            IntPtr bp = Marshal.AllocHGlobal(buff_num);

            while (fp.Position != fp.Length)
            {
                if (stop_audio)
                {
                    break;
                }
                len = fp.Read(buff, 0, buff_num);
                Marshal.Copy(buff, 0, bp, buff.Length);
                errcode = MSCDLL.QISRAudioWrite(MSCDLL.PtrToStr(session_id), bp, (uint)len, aud_stat, ref ep_stat, ref rec_stat);
                if (errcode != (int)ErrorCode.MSP_SUCCESS)
                {
                    fp.Close();
                    Console.WriteLine("写入识别的音频失败！" + errcode);
                    return null;
                }

                if (rec_stat == RecogStatus.ISR_REC_STATUS_SUCCESS)
                {
                    IntPtr p = MSCDLL.QISRGetResult(MSCDLL.PtrToStr(session_id), ref rec_rslt, 0, ref errcode);
                    if (p != IntPtr.Zero)
                    {
                        string temp = MSCDLL.PtrToStr(p);
                        DataArrived(this, new DataArrivedEventArgs(temp));
                        result.Append(temp);
                        Console.WriteLine("部分结果：" + temp);
                    }
                }
                Thread.Sleep(200);
            }
            fp.Close();

            errcode = MSCDLL.QISRAudioWrite(MSCDLL.PtrToStr(session_id), bp, 1, AudioStatus.ISR_AUDIO_SAMPLE_LAST, ref ep_stat, ref rec_stat);
            if (errcode != (int)ErrorCode.MSP_SUCCESS)
            {
                Console.WriteLine("写入音频失败！" + errcode);
                return null;
            }
            Marshal.FreeHGlobal(bp);
            int loop_count = 0;
            do
            {
                IntPtr p = MSCDLL.QISRGetResult(MSCDLL.PtrToStr(session_id), ref rec_rslt, 0, ref errcode);
                if (p != IntPtr.Zero)
                {
                    string temp = MSCDLL.PtrToStr(p);
                    DataArrived(this, new DataArrivedEventArgs(temp));
                    result.Append(temp);
                    Console.WriteLine("最后一块音频：" + temp);
                }
                if (errcode != (int)ErrorCode.MSP_SUCCESS)
                {
                    Console.WriteLine("写入音频失败！" + errcode);
                    return null;
                }
                Thread.Sleep(500);
            } while (rec_rslt != RecogStatus.ISR_REC_STATUS_SPEECH_COMPLETE && loop_count++ < 30);
            #endregion
            int errorcode = MSCDLL.QISRSessionEnd(MSCDLL.PtrToStr(session_id), hints);
            Console.WriteLine("语音听写结束");
            Console.WriteLine("语音识别结果：");
            Console.WriteLine(result);
            
            return result;
        }

这里使用了两种识别方法，通过麦克风数据使用的识别是一次性读取数据后识别，而通过文件数据使用的是边读取数据边识别。由于说话的时间一般比较短，所以通过麦克风的设置为一次性读取（此处可以改进），而文件有时候过大，识别的时间过长，因此分段读取识别更好。注意，讯飞语音识别暂时只支持一分钟内的音频识别！

如果需要延长识别间隔，可以修改麦克风类的ENDS

麦克风识别效果