binding/unity/Assets/Rhino/RhinoManager.cs

﻿//
// Copyright 2021 Picovoice Inc.
//
// You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
// file accompanying this source.
//
// Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
// an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//

using System;

using UnityEngine;


namespace Pv.Unity
{

    public class RhinoManager
    {
        private Rhino _rhino;
        private Action<Inference> _inferenceCallback;
        private Action<RhinoException> _processErrorCallback;

        /// <summary>
        /// Creates an instance of Rhino inference engine with built-in audio processing
        /// </summary>
        /// <param name="accessKey">AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).</param>
        /// <param name="contextPath">Absolute path to the Rhino context file (.rhn).</param>
        /// <param name="inferenceCallback">A callback for when Rhino has made an intent inference.</param>
        /// <param name="modelPath">(Optional) Absolute path to the file containing model parameters. If not set it will be set to the default location.</param>
        /// <param name="sensitivity">
        /// (Optional) Inference sensitivity. A higher sensitivity value results in
        /// fewer misses at the cost of (potentially) increasing the erroneous inference rate.
        /// Sensitivity should be a floating-point number within 0 and 1.
        /// </param>
        /// <param name="endpointDurationSec">
        /// (Optional) Endpoint duration in seconds. An endpoint is a chunk of silence at the end of an
        /// utterance that marks the end of spoken command. It should be a positive number within [0.5, 5]. A lower endpoint
        /// duration reduces delay and improves responsiveness. A higher endpoint duration assures Rhino doesn't return inference
        /// preemptively in case the user pauses before finishing the request.
        /// </param>
        /// <param name="requireEndpoint">
        /// (Optional) If set to `true`, Rhino requires an endpoint (a chunk of silence) after the spoken command.
        /// If set to `false`, Rhino tries to detect silence, but if it cannot, it still will provide inference regardless. Set
        /// to `false` only if operating in an environment with overlapping speech (e.g. people talking in the background).
        /// </param>
        /// <param name="processErrorCallback">(Optional) Reports errors that are encountered while the engine is processing audio.</returns>
        public static RhinoManager Create(
            string accessKey,
            string contextPath,
            Action<Inference> inferenceCallback,
            string modelPath = null,
            float sensitivity = 0.5f,
            float endpointDurationSec = 1.0f,
            bool requireEndpoint = true,
            Action<RhinoException> processErrorCallback = null)
        {
            Rhino rhino = Rhino.Create(accessKey, contextPath, modelPath: modelPath, sensitivity: sensitivity, endpointDurationSec: endpointDurationSec, requireEndpoint: requireEndpoint);
            return new RhinoManager(rhino, inferenceCallback, processErrorCallback);
        }

        // private constructor
        private RhinoManager(Rhino rhino, Action<Inference> inferenceCallback, Action<RhinoException> processErrorCallback = null)
        {
            _rhino = rhino;
            _inferenceCallback = inferenceCallback;
            _processErrorCallback = processErrorCallback;

            VoiceProcessor.Instance.AddFrameListener(OnFrameCaptured);
        }

        /// <summary>
        /// Action to catch audio frames as voice processor produces them
        /// </summary>
        /// <param name="frame">Frame of audio</param>
        private void OnFrameCaptured(short[] frame)
        {
            try
            {
                bool _isFinalized = _rhino.Process(frame);
                if (_isFinalized)
                {
                    Inference inference = _rhino.GetInference();
                    if (_inferenceCallback != null)
                        _inferenceCallback.Invoke(inference);

                    VoiceProcessor.Instance.RemoveFrameListener(OnFrameCaptured);
                    if (VoiceProcessor.Instance.NumFrameListeners == 0)
                    {
                        VoiceProcessor.Instance.StopRecording();
                    }
                }
            }
            catch (RhinoException ex)
            {
                if (_processErrorCallback != null)
                    _processErrorCallback(ex);
                else
                    Debug.LogError(ex.ToString());
            }
        }

        /// <summary>
        /// Checks to see whether RhinoManager is capturing audio or not
        /// </summary>
        /// <returns>whether RhinoManager  is capturing audio or not</returns>
        public bool IsRecording => VoiceProcessor.Instance.IsRecording;

        /// <summary>
        /// Checks to see whether there are any audio capture devices available
        /// </summary>
        /// <returns>whether there are any audio capture devices available</returns>
        public bool IsAudioDeviceAvailable()
        {
            VoiceProcessor.Instance.UpdateDevices();
            return VoiceProcessor.Instance.CurrentDeviceIndex >= 0;
        }

        /// <summary>
        /// Starts audio capture and intent inference
        /// </summary>
        public void Process()
        {
            if (_rhino == null)
            {
                throw new RhinoInvalidStateException("Cannot start RhinoManager - resources have already been released");
            }
            VoiceProcessor.Instance.StartRecording(_rhino.FrameLength, _rhino.SampleRate);
        }

        /// <summary>
        /// Free resources that were allocated to Rhino and the voice processor
        /// </summary>
        public void Delete()
        {
            if (_rhino != null)
            {
                VoiceProcessor.Instance.RemoveFrameListener(OnFrameCaptured);
                if (VoiceProcessor.Instance.NumFrameListeners == 0)
                {
                    VoiceProcessor.Instance.StopRecording();
                }

                _rhino.Dispose();
                _rhino = null;
            }
        }
    }
}