// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
#include <string>
#include "base/basictypes.h"
#include "content/common/content_export.h"
#include "content/public/common/speech_recognition_grammar.h"
#include "content/public/common/speech_recognition_result.h"
namespace content {
class AudioChunk;
struct SpeechRecognitionError;
// This interface models the basic contract that a speech recognition engine,
// either working locally or relying on a remote web-service, must obey.
// The expected call sequence for exported methods is:
// StartRecognition Mandatory at beginning of SR.
// TakeAudioChunk For every audio chunk pushed.
// AudioChunksEnded Finalize the audio stream (omitted in case of errors).
// EndRecognition Mandatory at end of SR (even on errors).
// No delegate callbacks are allowed before StartRecognition or after
// EndRecognition. If a recognition was started, the caller can free the
// SpeechRecognitionEngine only after calling EndRecognition.
class SpeechRecognitionEngine {
public:
// Interface for receiving callbacks from this object.
class Delegate {
public:
// Called whenever a result is retrieved. It might be issued several times,
// (e.g., in the case of continuous speech recognition engine
// implementations).
virtual void OnSpeechRecognitionEngineResults(
const SpeechRecognitionResults& results) = 0;
virtual void OnSpeechRecognitionEngineError(
const SpeechRecognitionError& error) = 0;
protected:
virtual ~Delegate() {}
};
// Remote engine configuration.
struct CONTENT_EXPORT Config {
Config();
~Config();
std::string language;
SpeechRecognitionGrammarArray grammars;
bool filter_profanities;
bool continuous;
bool interim_results;
uint32 max_hypotheses;
std::string hardware_info;
std::string origin_url;
int audio_sample_rate;
int audio_num_bits_per_sample;
};
virtual ~SpeechRecognitionEngine() {}
// Set/change the recognition engine configuration. It is not allowed to call
// this function while a recognition is ongoing.
virtual void SetConfig(const Config& config) = 0;
// Called when the speech recognition begins, before any TakeAudioChunk call.
virtual void StartRecognition() = 0;
// End any recognition activity and don't make any further callback.
// Must be always called to close the corresponding StartRecognition call,
// even in case of errors.
// No further TakeAudioChunk/AudioChunksEnded calls are allowed after this.
virtual void EndRecognition() = 0;
// Push a chunk of uncompressed audio data, where the chunk length agrees with
// GetDesiredAudioChunkDurationMs().
virtual void TakeAudioChunk(const AudioChunk& data) = 0;
// Notifies the engine that audio capture has completed and no more chunks
// will be pushed. The engine, however, can still provide further results
// using the audio chunks collected so far.
virtual void AudioChunksEnded() = 0;
// Checks wheter recognition of pushed audio data is pending.
virtual bool IsRecognitionPending() const = 0;
// Retrieves the desired duration, in milliseconds, of pushed AudioChunk(s).
virtual int GetDesiredAudioChunkDurationMs() const = 0;
// set_delegate detached from constructor for lazy dependency injection.
void set_delegate(Delegate* delegate) { delegate_ = delegate; }
protected:
Delegate* delegate() const { return delegate_; }
private:
Delegate* delegate_;
};
// These typedefs are to workaround the issue with certain versions of
// Visual Studio where it gets confused between multiple Delegate
// classes and gives a C2500 error.
typedef SpeechRecognitionEngine::Delegate SpeechRecognitionEngineDelegate;
typedef SpeechRecognitionEngine::Config SpeechRecognitionEngineConfig;
} // namespace content
#endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_