百度语音识别测试

百度语音识别Java SDK需要首先生成pcm、wav等语音文件，上传语音文件，返回识别结果。

我在北国不背锅

544人浏览 · 2022-06-22 17:46:57

我在北国不背锅 · 2022-06-22 17:46:57 发布

百度语音识别Java SDK需要首先生成pcm、wav等语音文件，上传语音文件，返回识别结果。

添加maven依赖：

<dependency>
    <groupId>com.baidu.aip</groupId>
    <artifactId>java-sdk</artifactId>
    <version>4.16.8</version>
</dependency>
<dependency>
    <groupId>org.json</groupId>
    <artifactId>json</artifactId>
    <version>20160810</version>
</dependency>
<dependency>
    <groupId>log4j</groupId>
    <artifactId>log4j</artifactId>
    <version>1.2.17</version>
</dependency>

生成语音文件：

package com.zhbr.util;

import javax.sound.sampled.*;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;

/**
 * @program：EngineeCore
 * @description：TODO
 * @author：zhaoyanning
 * @create：2022/6/22 14:30
 **/
public class EngineeCore {

    private String filePath;

    public EngineeCore(){};

    public EngineeCore(String filePath) {
        this.filePath = filePath;
    }

    public String getFilePath() {
        return filePath;
    }

    public void setFilePath(String filePath) {
        this.filePath = filePath;
    }

    AudioFormat audioFormat;
    TargetDataLine targetDataLine;
    boolean flag = true;

    private void stopRecognize() {
        flag = false;
        targetDataLine.stop();
        targetDataLine.close();
    }

    private AudioFormat getAudioFormat() {
        float sampleRate = 16000;
        // 8000,11025,16000,22050,44100
        int sampleSizeInBits = 16;
        // 8,16
        int channels = 1;
        // 1,2
        boolean signed = true;
        // true,false
        boolean bigEndian = false;
        // true,false
        return new AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian);
    }


    public void startRecognize() {
        try {
            // 获得指定的音频格式
            audioFormat = getAudioFormat();
            DataLine.Info dataLineInfo = new DataLine.Info(TargetDataLine.class, audioFormat);
            targetDataLine = (TargetDataLine) AudioSystem.getLine(dataLineInfo);
            flag = true;
            new CaptureThread().start();
        } catch (Exception e) {
            e.printStackTrace();
        } // end catch
    }// end captureAudio method

    class CaptureThread extends Thread {
        @Override
        public void run() {
            AudioFileFormat.Type fileType = null;
            File audioFile = new File(filePath);

            fileType = AudioFileFormat.Type.WAVE;
            //声音录入的权值
            int weight = 5;
            //判断是否停止的计数
            int downSum = 0;

            ByteArrayInputStream bais = null;
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            AudioInputStream ais = null;
            try {
                targetDataLine.open(audioFormat);
                targetDataLine.start();
                byte[] fragment = new byte[1024];

                ais = new AudioInputStream(targetDataLine);
                while (flag) {

                    targetDataLine.read(fragment, 0, fragment.length);
                    //当数组末位大于weight时开始存储字节（有声音传入），一旦开始不再需要判断末位
                    if (Math.abs(fragment[fragment.length - 1]) > weight || baos.size() > 0) {
                        baos.write(fragment);
                        System.out.println("守卫：" + fragment[0] + ",末尾：" + fragment[fragment.length - 1] + ",lenght" + fragment.length);
                        //判断语音是否停止
                        if (Math.abs(fragment[fragment.length - 1]) <= weight) {
                            downSum++;
                        } else {
                            System.out.println("重置奇数");
                            downSum = 0;
                        }
                        //计数超过20说明此段时间没有声音传入(值也可更改)
                        if (downSum > 20) {
                            System.out.println("停止录入");
                            break;
                        }

                    }
                }

                //取得录音输入流
                audioFormat = getAudioFormat();
                byte audioData[] = baos.toByteArray();
                bais = new ByteArrayInputStream(audioData);
                ais = new AudioInputStream(bais, audioFormat, audioData.length / audioFormat.getFrameSize());
                //定义最终保存的文件名
                System.out.println("开始生成语音文件");
                AudioSystem.write(ais, AudioFileFormat.Type.WAVE, audioFile);
                downSum = 0;
                stopRecognize();

            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                //关闭流
                try {
                    ais.close();
                    bais.close();
                    baos.reset();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    public static void main(String[] args) throws Exception {

        EngineeCore engineeCore = new EngineeCore();
        engineeCore.setFilePath("C:\\Users\\zyn\\Downloads\\public\\public\\test.wav");
        engineeCore.startRecognize();
    }
}

调用百度api识别语音文件：

package com.zhbr.shibie;

import com.baidu.aip.speech.AipSpeech;
import org.json.JSONObject;

/**
 * @program：Sample_baidu
 * @description：TODO
 * @author：zhaoyanning
 * @create：2022/6/22 11:09
 **/

public class Sample_common_baidu {
    //设置APPID/AK/SK
    public static final String APP_ID = "xxxx";
    public static final String API_KEY = "xxxx";
    public static final String SECRET_KEY = "xxxx";

    public static void main(String[] args) {
        // 初始化一个AipSpeech
        AipSpeech client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY);

        // 可选：设置网络连接参数
        client.setConnectionTimeoutInMillis(2000);
        client.setSocketTimeoutInMillis(60000);

        // 可选：设置代理服务器地址, http和socket二选一，或者均不设置
        //client.setHttpProxy("proxy_host", proxy_port);  // 设置http代理
        //client.setSocketProxy("proxy_host", proxy_port);  // 设置socket代理

        // 可选：设置log4j日志输出格式，若不设置，则使用默认配置
        // 也可以直接通过jvm启动参数设置此环境变量
        System.setProperty("aip.log4j.conf", "path/to/your/log4j.properties");

        // 调用接口
        JSONObject res = client.asr("C:\\Users\\zyn\\Downloads\\public\\public\\test.wav", "wav", 16000, null);
        System.out.println(res.toString(2));
    }
}

点击阅读全文

CSDN学习社区

CSDN联合极客时间，共同打造面向开发者的精品内容学习社区，助力成长！

更多推荐

cover

Kaldi之父，IEEE Fellow，小米首席语音科学家Daniel Povey将出席2024全球机器学习技术大会并发表演讲！

CSDN学习社区

cover

探索神经网络在商品销售和图像识别中的应用

CSDN学习社区

cover

基于stm32F103的座面声控台灯

CSDN学习社区

所有评论(0)

我在北国不背锅

@weixin_44455388

已为社区贡献3条内容