Android SDK之ASR源码及流程分析

ASR全称是Automatic Speech Recognition，是自动语音识别，将人说出来的化自动识别成文本。本文将从ASR的简单demo使用，对Android SDK里涉及的语音识别进行源码分析涉及的Android SDK 核心源码路径如下:android.speech.SpeechRecognizerandroid.speech.RecognitionListeneran...

awodefengduanwu

1054人浏览 · 2020-04-17 15:06:08

awodefengduanwu · 2020-04-17 15:06:08 发布

ASR全称是Automatic Speech Recognition，是自动语音识别，将人说出来的化自动识别成文本。本文将从ASR的简单demo使用，对Android SDK里涉及的语音识别进行源码分析
涉及的Android SDK 核心源码路径如下:

android.speech.SpeechRecognizer
 
android.speech.RecognitionListener
 
android.speech.RecognitionService

Demo如下
前置条件:允许录制语音权限

 private void testRecognition() {
        boolean recognitionAvailable = SpeechRecognizer.isRecognitionAvailable(this);
        Log.e(TAG, "recognition==" + recognitionAvailable);
        if (!recognitionAvailable) {
            return;
        }
        List<ResolveInfo> list = getPackageManager().queryIntentServices(new Intent(
                RecognitionService.SERVICE_INTERFACE), getPackageManager().MATCH_ALL);
        Log.e(TAG,"RecognitionService size="+list.size());
        ResolveInfo info = list.get(0);
        Log.e(TAG, "packageName=" + info.serviceInfo.packageName + " /name=" + info.serviceInfo.name + "//info=" + info.toString());
        ComponentName currentRecognitionCmp = new ComponentName(info.serviceInfo.packageName, info.serviceInfo.name);
        speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this, currentRecognitionCmp);
        speechRecognizer.setRecognitionListener(new SampleRecognitionListener());
        Intent mRecognitionIntent = null;
        if (mRecognitionIntent == null) {
            mRecognitionIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
            mRecognitionIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
        }
        speechRecognizer.startListening(mRecognitionIntent);
    }
 
  class SampleRecognitionListener implements RecognitionListener {
 
        @Override
        public void onReadyForSpeech(Bundle bundle) {
            Log.d(TAG, "onReadyForSpeech Start");
        }
 
        @Override
        public void onBeginningOfSpeech() {
            Log.d(TAG, "onBeginningOfSpeech Start");
        }
 
        @Override
        public void onRmsChanged(float v) {
            Log.d(TAG, "onRmsChanged Start");
        }
 
        @Override
        public void onBufferReceived(byte[] bytes) {
            Log.d(TAG, "onBufferReceived Start");
        }
 
        @Override
        public void onEndOfSpeech() {
            Log.d(TAG, "onEndOfSpeech Start");
        }
 
        @Override
        public void onError(int error) {
            Log.d(TAG, "onError Start");
            switch (error) {
                case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
                    resetText("网络链接超时");
                    break;
                case SpeechRecognizer.ERROR_NETWORK:
                    resetText("网络错误或者没有权限");
                    break;
                case SpeechRecognizer.ERROR_AUDIO:
                    resetText("音频发生错误");
                    break;
                case SpeechRecognizer.ERROR_CLIENT:
                    resetText("连接出错");
                    break;
                case SpeechRecognizer.ERROR_SERVER:
                    resetText("服务器出错");
                    break;
                case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
                    resetText("什么也没有听到");
                    break;
                case SpeechRecognizer.ERROR_NO_MATCH:
                    resetText("没有匹配到合适的结果");
                    break;
                case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
                    resetText("RecognitionService已经启动,请稍后");
                    break;
                case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
                    resetText("请赋予APP权限,另请（Android6.0以上）确认动态申请权限");
                    break;
                default:
                    break;
            }
            Log.d(TAG, "onError End");
        }
 
        @Override
        public void onResults(Bundle results) {
            Log.d(TAG, "onResults Start");
            String key = SpeechRecognizer.RESULTS_RECOGNITION;
            ArrayList mResult = results.getStringArrayList(key);
            String[] result = new String[0];
            if (mResult != null) {
                result = new String[mResult.size()];
            }
            if (mResult != null) {
                mResult.toArray(result);
            }
            Log.d(TAG, "Recognize size="+result.length);
            Log.e(TAG, result[0]);
            Log.d(TAG, "onResults End");
        }
 
        @Override
        public void onPartialResults(Bundle bundle) {
            Log.d(TAG, "onPartialResults Start");
        }
 
        @Override
        public void onEvent(int i, Bundle bundle) {
            Log.d(TAG, "onEvent Start");
        }
    }

运行结果如下:

2020-04-09 14:13:30.716 4638-4638/com.example.test E/speech: recognition==true
2020-04-09 14:13:30.717 4638-4638/com.example.test E/speech: RecognitionService size=1
2020-04-09 14:13:30.717 4638-4638/com.example.test E/speech: packageName=com.iflytek.speechsuite /name=com.iflytek.iatservice.SpeechService//info=ResolveInfo{aa56b7c com.iflytek.speechsuite/com.iflytek.iatservice.SpeechService m=0x108000}
2020-04-09 14:13:30.784 4638-4638/com.example.test D/speech: onReadyForSpeech Start
2020-04-09 14:13:30.784 4638-4638/com.example.test D/speech: onBeginningOfSpeech Start
2020-04-09 14:13:30.979 4638-4638/com.example.test D/speech: onRmsChanged Start
2020-04-09 14:13:31.015 4638-4638/com.example.test D/speech: onRmsChanged Start
                                                             .....
 
2020-04-09 14:13:33.898 4638-4638/com.example.test D/speech: onRmsChanged Start
2020-04-09 14:13:33.935 4638-4638/com.example.test D/speech: onRmsChanged Start
2020-04-09 14:13:33.941 4638-4638/com.example.test D/speech: onEndOfSpeech Start
2020-04-09 14:13:34.063 4638-4638/com.example.test D/speech: onResults Start
2020-04-09 14:13:34.064 4638-4638/com.example.test D/speech: Recognize size=1
2020-04-09 14:13:34.064 4638-4638/com.example.test E/speech: 哈喽你好。
2020-04-09 14:13:34.064 4638-4638/com.example.test D/speech: onResults End

总结:只有一个引擎，测试机子是荣耀，对应的服务是com.iflytek.iatservice.SpeechService；如果对应的是其它手机，那么引擎的服务可能会有不同

举例如下:

三星无
荣耀 com.iflytek.iatservice.SpeechService
红米 com.xiaomi.mibrain.speech.asr.AsrService
原生板子 com.google.android.vociesearch.serviceapi.googleRecognitionService
总结:
Android SDK 关于ASR的代码只是定义了语音识别的接口，具体的引擎需要各个厂商单独定义
核心流程就是SpeechRecognizer创建语音识别，开启继承RecognitionService的服务引擎，具体的语音识别过程回调返回给SpeechRecognizer
语音识别的服务继承自RecognitionService，且action="android.speech.RecognitionService"引擎识别从实现服务类的onStartListening方法开始
源码分析
首先判断是否有语音识别服务,查询所有的引擎服务，至少有一个时就可以使用，否则无法使用。如果是三星(Galaxy Tab S5e)，没有引擎服务(或者没有适配Android SDK )

/**
 * Checks whether a speech recognition service is available on the system. If this method
 * returns {@code false}, {@link SpeechRecognizer#createSpeechRecognizer(Context)} will
 * fail.
 * 
 * @param context with which {@code SpeechRecognizer} will be created
 * @return {@code true} if recognition is available, {@code false} otherwise
 */
public static boolean isRecognitionAvailable(final Context context) {
    final List<ResolveInfo> list = context.getPackageManager().queryIntentServices(
            new Intent(RecognitionService.SERVICE_INTERFACE), 0);
    return list != null && list.size() != 0;
}

然后创建SpeechRecognizer实例，只能通过下面的两种方式创建，构造函数是私有的。因为我们是调用其它的服务，所以需要使用ComponentName调用对应的服务，否则 bind to recognition service failed；

 /**
     * Factory method to create a new {@code SpeechRecognizer}. Please note that
     * {@link #setRecognitionListener(RecognitionListener)} should be called before dispatching any
     * command to the created {@code SpeechRecognizer}, otherwise no notifications will be
     * received.
     *
     * @param context in which to create {@code SpeechRecognizer}
     * @return a new {@code SpeechRecognizer}
     */
    public static SpeechRecognizer createSpeechRecognizer(final Context context) {
        return createSpeechRecognizer(context, null);
    }
 
    /**
     * Factory method to create a new {@code SpeechRecognizer}. Please note that
     * {@link #setRecognitionListener(RecognitionListener)} should be called before dispatching any
     * command to the created {@code SpeechRecognizer}, otherwise no notifications will be
     * received.
     *
     * Use this version of the method to specify a specific service to direct this
     * {@link SpeechRecognizer} to. Normally you would not use this; use
     * {@link #createSpeechRecognizer(Context)} instead to use the system default recognition
     * service.
     * 
     * @param context in which to create {@code SpeechRecognizer}
     * @param serviceComponent the {@link ComponentName} of a specific service to direct this
     *        {@code SpeechRecognizer} to
     * @return a new {@code SpeechRecognizer}
     */
    public static SpeechRecognizer createSpeechRecognizer(final Context context,
            final ComponentName serviceComponent) {
        if (context == null) {
            throw new IllegalArgumentException("Context cannot be null)");
        }
        checkIsCalledFromMainThread();
        return new SpeechRecognizer(context, serviceComponent);
    }

SpeechRecognizer构造函数什么也没有，只是赋值

/**
* The right way to create a {@code SpeechRecognizer} is by using
* {@link #createSpeechRecognizer} static factory method
*/
private SpeechRecognizer(final Context context, final ComponentName serviceComponent) {
mContext = context;
mServiceComponent = serviceComponent;
}
在调用语音识别服务前，需要提前设置识别监听器setRecognitionListener；mService目前还没有实例，对应的RecognitionService实现类，先放到mPendingTasks链式队列里。

 /**
     * Sets the listener that will receive all the callbacks. The previous unfinished commands will
     * be executed with the old listener, while any following command will be executed with the new
     * listener.
     * 
     * @param listener listener that will receive all the callbacks from the created
     *        {@link SpeechRecognizer}, this must not be null.
     */
    public void setRecognitionListener(RecognitionListener listener) {
        checkIsCalledFromMainThread();
        putMessage(Message.obtain(mHandler, MSG_CHANGE_LISTENER, listener));
    }
  /** The actual RecognitionService endpoint */
    private IRecognitionService mService;
 
/**
 * Temporary queue, saving the messages until the connection will be established, afterwards,
 * only mHandler will receive the messages
 */
    private final Queue<Message> mPendingTasks = new LinkedList<Message>();
 
    
private void putMessage(Message msg) {
        if (mService == null) {
            mPendingTasks.offer(msg);
        } else {
            mHandler.sendMessage(msg);
        }
    }

假如服务已经启动，查看mHandler处理，就是赋值操作

 /** Handler that will execute the main tasks */
    private Handler mHandler = new Handler() {
        @Override
        public void handleMessage(Message msg) {
            switch (msg.what) {
                case MSG_START:
                    handleStartListening((Intent) msg.obj);
                    break;
                case MSG_STOP:
                    handleStopMessage();
                    break;
                case MSG_CANCEL:
                    handleCancelMessage();
                    break;
                case MSG_CHANGE_LISTENER:
                    handleChangeListener((RecognitionListener) msg.obj);
                    break;
            }
        }
    };
 
 
 
 /** changes the listener */
    private void handleChangeListener(RecognitionListener listener) {
        if (DBG) Log.d(TAG, "handleChangeListener, listener=" + listener);
        mListener.mInternalListener = listener;
    }

这里需要重点关注下接口，它就是识别过程的回到，从识别开始到识别结束，有默认识别时间

package android.speech;
 
/**
 * Used for receiving notifications from the SpeechRecognizer when the
 * recognition related events occur. All the callbacks are executed on the
 * Application main thread.
 */
public interface RecognitionListener {
    /**
     * Called when the endpointer is ready for the user to start speaking.
     * 
     * @param params parameters set by the recognition service. Reserved for future use.
     */
    void onReadyForSpeech(Bundle params);
 
    /**
     * The user has started to speak.
     */
    void onBeginningOfSpeech();
 
    /**
     * The sound level in the audio stream has changed. There is no guarantee that this method will
     * be called.
     * 
     * @param rmsdB the new RMS dB value
     */
    void onRmsChanged(float rmsdB);
 
    /**
     * More sound has been received. The purpose of this function is to allow giving feedback to the
     * user regarding the captured audio. There is no guarantee that this method will be called.
     * 
     * @param buffer a buffer containing a sequence of big-endian 16-bit integers representing a
     *        single channel audio stream. The sample rate is implementation dependent.
     */
    void onBufferReceived(byte[] buffer);
 
    /**
     * Called after the user stops speaking.
     */
    void onEndOfSpeech();
 
    /**
     * A network or recognition error occurred.
     * 
     * @param error code is defined in {@link SpeechRecognizer}
     */
    void onError(int error);
 
    /**
     * Called when recognition results are ready.
     * 
     * @param results the recognition results. To retrieve the results in {@code
     *        ArrayList<String>} format use {@link Bundle#getStringArrayList(String)} with
     *        {@link SpeechRecognizer#RESULTS_RECOGNITION} as a parameter. A float array of
     *        confidence values might also be given in {@link SpeechRecognizer#CONFIDENCE_SCORES}.
     */
    void onResults(Bundle results);
 
    /**
     * Called when partial recognition results are available. The callback might be called at any
     * time between {@link #onBeginningOfSpeech()} and {@link #onResults(Bundle)} when partial
     * results are ready. This method may be called zero, one or multiple times for each call to
     * {@link SpeechRecognizer#startListening(Intent)}, depending on the speech recognition
     * service implementation.  To request partial results, use
     * {@link RecognizerIntent#EXTRA_PARTIAL_RESULTS}
     * 
     * @param partialResults the returned results. To retrieve the results in
     *        ArrayList&lt;String&gt; format use {@link Bundle#getStringArrayList(String)} with
     *        {@link SpeechRecognizer#RESULTS_RECOGNITION} as a parameter
     */
    void onPartialResults(Bundle partialResults);
 
    /**
     * Reserved for adding future events.
     * 
     * @param eventType the type of the occurred event
     * @param params a Bundle containing the passed parameters
     */
    void onEvent(int eventType, Bundle params);
}

然后设置对应的服务action,语音识别开启监听；首先开始绑定服务,然后发送MSG_START

 /**
     * Starts listening for speech. Please note that
     * {@link #setRecognitionListener(RecognitionListener)} should be called beforehand, otherwise
     * no notifications will be received.
     *
     * @param recognizerIntent contains parameters for the recognition to be performed. The intent
     *        may also contain optional extras, see {@link RecognizerIntent}. If these values are
     *        not set explicitly, default values will be used by the recognizer.
     */
    public void startListening(final Intent recognizerIntent) {
        if (recognizerIntent == null) {
            throw new IllegalArgumentException("intent must not be null");
        }
        checkIsCalledFromMainThread();
        if (mConnection == null) { // first time connection
            mConnection = new Connection();
            
            Intent serviceIntent = new Intent(RecognitionService.SERVICE_INTERFACE);
            
            if (mServiceComponent == null) {
                String serviceComponent = Settings.Secure.getString(mContext.getContentResolver(),
                        Settings.Secure.VOICE_RECOGNITION_SERVICE);
                
                if (TextUtils.isEmpty(serviceComponent)) {
                    Log.e(TAG, "no selected voice recognition service");
                    mListener.onError(ERROR_CLIENT);
                    return;
                }
                
                serviceIntent.setComponent(ComponentName.unflattenFromString(serviceComponent));                
            } else {
                serviceIntent.setComponent(mServiceComponent);
            }
            
            if (!mContext.bindService(serviceIntent, mConnection, Context.BIND_AUTO_CREATE)) {
                Log.e(TAG, "bind to recognition service failed");
                mConnection = null;
                mService = null;
                mListener.onError(ERROR_CLIENT);
                return;
            }
        }
        putMessage(Message.obtain(mHandler, MSG_START, recognizerIntent));
    }

这里首先跟着服务绑定成功后查看流程，首先确定这是个跨进程服务，然后判断前面加入的Message是否为null,如果不是空的，循环执行Message，包括前面设置的MSG_CHANGE_LISTENER，返回跨进程的mService

 /**
     * Basic ServiceConnection that records the mService variable. Additionally, on creation it
     * invokes the {@link IRecognitionService#startListening(Intent, IRecognitionListener)}.
     */
    private class Connection implements ServiceConnection {
 
        public void onServiceConnected(final ComponentName name, final IBinder service) {
            // always done on the application main thread, so no need to send message to mHandler
            mService = IRecognitionService.Stub.asInterface(service);
            if (DBG) Log.d(TAG, "onServiceConnected - Success");
            while (!mPendingTasks.isEmpty()) {
                mHandler.sendMessage(mPendingTasks.poll());
            }
        }
 
        public void onServiceDisconnected(final ComponentName name) {
            // always done on the application main thread, so no need to send message to mHandler
            mService = null;
            mConnection = null;
            mPendingTasks.clear();
            if (DBG) Log.d(TAG, "onServiceDisconnected - Success");
        }
    }

绑定服务成功后，我们查看MSG_START消息发送给mHandler后，最后调用了跨进程服务mService的startListening

 /** Handler that will execute the main tasks */
    private Handler mHandler = new Handler() {
        @Override
        public void handleMessage(Message msg) {
            switch (msg.what) {
                case MSG_START:
                    handleStartListening((Intent) msg.obj);
                    break;
                case MSG_STOP:
                    handleStopMessage();
                    break;
                case MSG_CANCEL:
                    handleCancelMessage();
                    break;
                case MSG_CHANGE_LISTENER:
                    handleChangeListener((RecognitionListener) msg.obj);
                    break;
            }
        }
    };
 
 
 
   /** sends the actual message to the service */
    private void handleStartListening(Intent recognizerIntent) {
        if (!checkOpenConnection()) {
            return;
        }
        try {
            mService.startListening(recognizerIntent, mListener);
            if (DBG) Log.d(TAG, "service start listening command succeded");
        } catch (final RemoteException e) {
            Log.e(TAG, "startListening() failed", e);
            mListener.onError(ERROR_CLIENT);
        }
    }

绑定的服务返回的mBinder,对应的是RecognitionServiceBinder，最终会调用服务内的mHandler，发送MSG_START_LISTENING；其中listener就是设置的RecognitionListener

android.speech.RecognitionService

/** Binder of the recognition service */
    private static final class RecognitionServiceBinder extends IRecognitionService.Stub {
        private final WeakReference<RecognitionService> mServiceRef;
 
        public RecognitionServiceBinder(RecognitionService service) {
            mServiceRef = new WeakReference<RecognitionService>(service);
        }
 
        @Override
        public void startListening(Intent recognizerIntent, IRecognitionListener listener) {
            if (DBG) Log.d(TAG, "startListening called by:" + listener.asBinder());
            final RecognitionService service = mServiceRef.get();
            if (service != null && service.checkPermissions(listener)) {
                service.mHandler.sendMessage(Message.obtain(service.mHandler,
                        MSG_START_LISTENING, service.new StartListeningArgs(
                                recognizerIntent, listener, Binder.getCallingUid())));
            }
        }
}

跟踪最后调用dispatchStartListening()，，里面涉及的如果服务挂掉了，则发送取消相关操作，然后调用RecognitionService.this.onStartListening(intent, mCurrentCallback)，其中mCurrentCallback含有前面设置的回调内容，而方法是服务的抽象方法，需要实现；涉及的抽象方法有onStartListening,onCancel,onStopListening.这个就是开启引擎的地方

 private void dispatchStartListening(Intent intent, final IRecognitionListener listener,
            int callingUid) {
        if (mCurrentCallback == null) {
            if (DBG) Log.d(TAG, "created new mCurrentCallback, listener = " + listener.asBinder());
            try {
                listener.asBinder().linkToDeath(new IBinder.DeathRecipient() {
                    @Override
                    public void binderDied() {
                        mHandler.sendMessage(mHandler.obtainMessage(MSG_CANCEL, listener));
                    }
                }, 0);
            } catch (RemoteException re) {
                Log.e(TAG, "dead listener on startListening");
                return;
            }
            mCurrentCallback = new Callback(listener, callingUid);
            RecognitionService.this.onStartListening(intent, mCurrentCallback);
        } else {
            try {
                listener.onError(SpeechRecognizer.ERROR_RECOGNIZER_BUSY);
            } catch (RemoteException e) {
                Log.d(TAG, "onError call from startListening failed");
            }
            Log.i(TAG, "concurrent startListening received - ignoring this call");
        }
    }
    /**
     * Notifies the service that it should start listening for speech.
     * 
     * @param recognizerIntent contains parameters for the recognition to be performed. The intent
     *        may also contain optional extras, see {@link RecognizerIntent}. If these values are
     *        not set explicitly, default values should be used by the recognizer.
     * @param listener that will receive the service's callbacks
     */
    protected abstract void onStartListening(Intent recognizerIntent, Callback listener);

一个空引擎服务如下,其中onStartListening是语音识别服务的开始，然后引擎识别过程根据CallBack进行回调

public class RecService extends RecognitionService {
 
    @Override
    protected void onStartListening(Intent recognizerIntent, Callback listener) {
 
    }
 
    @Override
    protected void onCancel(Callback listener) {
 
    }
 
    @Override
    protected void onStopListening(Callback listener) {
 
    }
}