说明:
1,百度语音极速版,需要事先保存声音文件,然后调用百度接口。

2,关于文件的上传,可以用JSON格式,上传文件的base64编码,也可以用row格式直接上传文件。下面的代码是JSON格式。

3,使用接口前需要向百度AI开放平台注册APPID,得到秘钥,然后用得到的APPID和秘钥来获得TOKEN。调用百度语音接口时需要TOKEN。

4,百度提供了一个工具,可以从拼音角度判断相似度,score越小说明拼音越相似,挺好用,非常适合需要匹配的场景。

5,相关Maven库:

<dependency>
    <groupId>com.github.stuxuhai</groupId>
    <artifactId>jpinyin</artifactId>
    <version>1.1.8</version>
</dependency>

上代码:

package test;

import com.alibaba.fastjson.JSONObject;
import sun.misc.BASE64Encoder;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.List;

/**
 * 百度语音识别相关工具
 */
public class VopUtil {

    public final static String URL = "https://vop.baidu.com/pro_api";
    public final static String TOKEN = "";  //需要事先注册
    public final static String CUID = "zheshiyigeceshi"; //用户唯一标识,用来区分用户,计算UV值。建议填写能区分用户的机器 MAC 地址或 IMEI 码,长度为60字符以内。

    public static void main(String[] args){

        String filePath="D:\\语音test\\我要买美汁源果粒橙.wav";    //语音文件路径,这个文件是我自己用手机的录音机录的一段语音,内容就是文件名
        String base64=getBaset64(filePath);                       //文件转为base64格式
        long fileLength=getFileLength(filePath);                  //获得文件长度,单位字节
        JSONObject vopResult = getVopStr(base64, fileLength);     //调用百度语音识别接口
        System.out.println("语音识别接口返回值:"+vopResult.toJSONString());

        String errMsg = vopResult.get("err_msg").toString();
        if (!"success.".equals(errMsg)) {
            System.out.println("识别失败");
        }

        String resultStr = vopResult.get("result").toString();
        resultStr = resultStr.replaceAll("\\[", "").replaceAll("]", "").replaceAll("\"", "");
        System.out.println("语音识别的结果:"+resultStr);

        String[] nameArr={"可乐","雪碧","美年达","美汁源","红牛","尖叫","牛奶","酸奶","百事"};  //备选名字列表,用于匹配拼音相似度
        String targetName = getSimilarWord(resultStr, nameArr);
        System.out.println("拼音最相似:"+targetName);
    }

    /**
     * 请求百度语音识别接口
     * @param file 文件的base64编码
     * @param len 文件长度,字节数
     * @return
     */
    public static JSONObject getVopStr(String file,long len) {
        JSONObject params = new JSONObject(true);
        params.put("format", "wav");
        params.put("rate", 16000);
        params.put("channel", 1);
        params.put("cuid", CUID);
        params.put("token", TOKEN);
        params.put("dev_pid", 80001);
        params.put("len", len);
        params.put("speech", file);

        String result = "";
        BufferedReader reader = null;

        BufferedReader in = null;
        try {
            URL url = new URL(URL);// 创建连接
            HttpURLConnection connection = (HttpURLConnection) url.openConnection();
            connection.setDoOutput(true);
            connection.setDoInput(true);
            connection.setUseCaches(false);
            connection.setInstanceFollowRedirects(true);
            connection.setRequestMethod("POST"); // 设置请求方式
            connection.setRequestProperty("Accept", "application/json"); // 设置接收数据的格式
            connection.setRequestProperty("Content-Type", "application/json"); // 设置发送数据的格式
            connection.connect();
            if (params != null) {
                // 设置文件长度
                //   connection.setRequestProperty("Content-Length", String.valueOf(writebytes.length));
                OutputStream outwritestream = connection.getOutputStream();
                outwritestream.write(params.toJSONString().getBytes());
                outwritestream.flush();
                outwritestream.close();

            }
            int responseCode = connection.getResponseCode();
            InputStream inputStream = null;

            if (responseCode == 200) {
                inputStream = new BufferedInputStream(connection.getInputStream());
            } else {
                inputStream = new BufferedInputStream(connection.getErrorStream());
            }

            in = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
            String line;
            while ((line = in.readLine()) != null) {
                result += line;
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return JSONObject.parseObject(result);

    }

    /**
     * 从目标字符串列表中查询拼音最相似的字符串
     * @param input 待查询的字符串
     * @param arr 备选词语库
     * @return
     */
    public static String getSimilarWord(String input,String[] arr){
        try {
            Search d = new Search(arr);
            List<Search.Score> list=d.search(input,10);
            System.out.println("拼音相似度列表:"+list);
            if(list!=null&&list.size()>0){
                Search.Score score=list.get(0);
                return score.word.toString();
            }
        }catch (Exception e){
            e.printStackTrace();
        }
        return null;
    }

    /**
     * 文件转base64
     * @param filePath 文件路径
     * @return
     */
    private static String getBaset64(String filePath){
        String base64="";
        try {
            File file = new File(filePath);
            FileInputStream inputFile = new FileInputStream(file);
            byte[] buffer = new byte[(int)file.length()];
            inputFile.read(buffer);
            inputFile.close();
            base64= new BASE64Encoder().encode(buffer);
            if(base64.contains("\n")){
                base64=base64.replaceAll("\n","");
            }
            if(base64.contains("\r")){
                base64=base64.replaceAll("\r","");
            }
        }catch (Exception e){
            e.printStackTrace();
        }
        return base64;
    }

    /**
     * 得到文件大小,单位:字节
     * @param filePath 文件路径
     * @return
     */
    private static long getFileLength(String filePath){
        File file = new File(filePath);
        long fileLength=file.length();
        return fileLength;
    }

}

 

下面是Search类,这个类是百度提供的:

package test;

import com.github.stuxuhai.jpinyin.PinyinException;
import com.github.stuxuhai.jpinyin.PinyinFormat;
import com.github.stuxuhai.jpinyin.PinyinHelper;

import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

/**
 * 本文件用来演示如何通过文本相似度进行纠错
 * <p>
 * <pre>
 * String[] list = new String[]{"张三", "张衫", "张丹", "张成", "李四", "李奎"};
 * Search d = new Search(list);
 * System.out.println(d.search("张三", 10));
 * System.out.println(d.search("李四", 10));
 *
 * 输出:
 * [{word=张三, score=0}, {word=张衫, score=1}, {word=张丹, score=1}, {word=张成, score=5}, {word=李四, score=9}, {word=李奎, score=10}]
 * [{word=李四, score=0}, {word=李奎, score=3}, {word=张三, score=9}, {word=张衫, score=10}, {word=张丹, score=10}, {word=张成, score=12}]
 * </pre>
 */
public class Search {
    final List<Word> targets = new ArrayList<Word>();

    public Search(String[] list) throws PinyinException {
        for (String s : list) {
            Word w = new Word(s);
            targets.add(w);
        }
    }


    public List<Score> search(String input, int limit) throws PinyinException {
        Word w = new Word(input);
        return targets.stream().map(x -> {
            Score s = new Score();
            s.word = x;
            s.score = x.compareTo(w);
            return s;
        }).sorted().limit(limit).collect(Collectors.toList());
    }


    public static int getEditDistance(String s, String t) {
        int d[][]; // matrix
        int n; // length of s
        int m; // length of t
        int i; // iterates through s
        int j; // iterates through t
        char s_i; // ith character of s
        char t_j; // jth character of t
        int cost; // cost
        // Step 1
        n = s.length();
        m = t.length();
        if (n == 0) {
            return m;
        }
        if (m == 0) {
            return n;
        }
        d = new int[n + 1][m + 1];

        // Step 2
        for (i = 0; i <= n; i++) {
            d[i][0] = i;
        }
        for (j = 0; j <= m; j++) {
            d[0][j] = j;
        }

        // Step 3
        for (i = 1; i <= n; i++) {
            s_i = s.charAt(i - 1);
            // Step 4
            for (j = 1; j <= m; j++) {
                t_j = t.charAt(j - 1);
                // Step 5
                cost = (s_i == t_j) ? 0 : 1;
                // Step 6
                d[i][j] = Minimum(d[i - 1][j] + 1, d[i][j - 1] + 1,
                        d[i - 1][j - 1] + cost);
            }
        }
        // Step 7
        return d[n][m];
    }

    private static int Minimum(int a, int b, int c) {
        int im = a < b ? a : b;
        return im < c ? im : c;
    }

    class Word implements Comparable {
        final String word;
        final String pinyin1;
        final String pinyin2;

        Word(String word) throws PinyinException {
            this.word = word;
            this.pinyin1 = PinyinHelper.convertToPinyinString(word, ",", PinyinFormat.WITH_TONE_NUMBER);
            this.pinyin2 = PinyinHelper.convertToPinyinString(word, ",", PinyinFormat.WITHOUT_TONE);
        }

        @Override
        public String toString() {
            return word;
        }

        @Override
        public int compareTo(Object o) {
            if (o instanceof Word) {
                Word o1 = (Word) o;
                int score1 = getEditDistance(this.pinyin1, o1.pinyin1);
                int score2 = getEditDistance(this.pinyin2, o1.pinyin2);
                return score1 + score2;
            }
            return 0;
        }
    }

    class Score implements Comparable {
        Word word;
        int score;

        @Override
        public int compareTo(Object o) {
            if (o instanceof Score) {
                return score - ((Score) o).score;
            }
            return 0;
        }

        @Override
        public String toString() {
            return "{" +
                    "word=" + word +
                    ", score=" + score +
                    '}';
        }
    }
}

 

本地执行main方法后的输出结果:

语音识别接口返回值:{"result":["我要买美汁源果粒橙。"],"err_msg":"success.","sn":"275665891801562141461","corpus_no":"6709346487565939628","err_no":0}
语音识别的结果:我要买美汁源果粒橙。
拼音相似度列表:[{word=美汁源, score=58}, {word=美年达, score=66}, {word=百事, score=73}, {word=红牛, score=74}, {word=尖叫, score=74}, {word=酸奶, score=75}, {word=可乐, score=76}, {word=牛奶, score=76}, {word=雪碧, score=77}]
拼音最相似:美汁源

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐