homeassistant-config/custom_components/watson_tts/tts.py

158 lines
4.6 KiB
Python
Raw Permalink Normal View History

"""Support for IBM Watson TTS integration."""
import logging
2019-11-30 16:57:04 +01:00
from ibm_watson import TextToSpeechV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
import voluptuous as vol
from homeassistant.components.tts import PLATFORM_SCHEMA, Provider
import homeassistant.helpers.config_validation as cv
_LOGGER = logging.getLogger(__name__)
2019-11-30 16:57:04 +01:00
CONF_URL = "watson_url"
CONF_APIKEY = "watson_apikey"
ATTR_CREDENTIALS = "credentials"
2019-11-30 16:57:04 +01:00
DEFAULT_URL = "https://stream.watsonplatform.net/text-to-speech/api"
2019-11-30 16:57:04 +01:00
CONF_VOICE = "voice"
CONF_OUTPUT_FORMAT = "output_format"
CONF_OUTPUT_AUDIO_RATE = 'output_audio_rate'
2019-11-30 16:57:04 +01:00
CONF_TEXT_TYPE = "text"
# List from https://tinyurl.com/watson-tts-docs
SUPPORTED_VOICES = [
"de-DE_BirgitVoice",
"de-DE_BirgitV2Voice",
2019-11-30 16:57:04 +01:00
"de-DE_BirgitV3Voice",
"de-DE_DieterVoice",
"de-DE_DieterV2Voice",
2019-11-30 16:57:04 +01:00
"de-DE_DieterV3Voice",
"en-GB_KateVoice",
2019-11-30 16:57:04 +01:00
"en-GB_KateV3Voice",
"en-US_AllisonVoice",
"en-US_AllisonV2Voice",
2019-11-30 16:57:04 +01:00
"en-US_AllisonV3Voice",
"en-US_LisaVoice",
"en-US_LisaV2Voice",
2019-11-30 16:57:04 +01:00
"en-US_LisaV3Voice",
"en-US_MichaelVoice",
"en-US_MichaelV2Voice",
2019-11-30 16:57:04 +01:00
"en-US_MichaelV3Voice",
"es-ES_EnriqueVoice",
2019-11-30 16:57:04 +01:00
"es-ES_EnriqueV3Voice",
"es-ES_LauraVoice",
2019-11-30 16:57:04 +01:00
"es-ES_LauraV3Voice",
"es-LA_SofiaVoice",
2019-11-30 16:57:04 +01:00
"es-LA_SofiaV3Voice",
"es-US_SofiaVoice",
2019-11-30 16:57:04 +01:00
"es-US_SofiaV3Voice",
"fr-FR_ReneeVoice",
2019-11-30 16:57:04 +01:00
"fr-FR_ReneeV3Voice",
"it-IT_FrancescaVoice",
"it-IT_FrancescaV2Voice",
2019-11-30 16:57:04 +01:00
"it-IT_FrancescaV3Voice",
"ja-JP_EmiVoice",
2019-11-30 16:57:04 +01:00
"pt-BR_IsabelaVoice",
"pt-BR_IsabelaV3Voice",
]
SUPPORTED_OUTPUT_FORMATS = [
2019-11-30 16:57:04 +01:00
"audio/flac",
"audio/mp3",
"audio/mpeg",
"audio/ogg",
"audio/ogg;codecs=opus",
"audio/ogg;codecs=vorbis",
"audio/wav",
]
CONTENT_TYPE_EXTENSIONS = {
2019-11-30 16:57:04 +01:00
"audio/flac": "flac",
"audio/mp3": "mp3",
"audio/mpeg": "mp3",
"audio/ogg": "ogg",
"audio/ogg;codecs=opus": "ogg",
"audio/ogg;codecs=vorbis": "ogg",
"audio/wav": "wav",
}
2019-11-30 16:57:04 +01:00
DEFAULT_VOICE = "en-US_AllisonVoice"
DEFAULT_OUTPUT_FORMAT = "audio/mp3"
2019-11-30 16:57:04 +01:00
PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
{
vol.Optional(CONF_URL, default=DEFAULT_URL): cv.string,
vol.Required(CONF_APIKEY): cv.string,
vol.Optional(CONF_VOICE, default=DEFAULT_VOICE): vol.In(SUPPORTED_VOICES),
vol.Optional(CONF_OUTPUT_FORMAT, default=DEFAULT_OUTPUT_FORMAT): vol.In(
SUPPORTED_OUTPUT_FORMATS
),
vol.Optional(CONF_OUTPUT_AUDIO_RATE): cv.positive_int,
}
)
2019-11-30 16:57:04 +01:00
def get_engine(hass, config, discovery_info=None):
"""Set up IBM Watson TTS component."""
2019-11-30 16:57:04 +01:00
authenticator = IAMAuthenticator(config[CONF_APIKEY])
service = TextToSpeechV1(authenticator)
service.set_service_url(config[CONF_URL])
supported_languages = list({s[:5] for s in SUPPORTED_VOICES})
default_voice = config[CONF_VOICE]
output_format = config[CONF_OUTPUT_FORMAT]
output_audio_rate = config.get(CONF_OUTPUT_AUDIO_RATE, None)
2019-11-30 16:57:04 +01:00
service.set_default_headers({"x-watson-learning-opt-out": "true"})
return WatsonTTSProvider(service, supported_languages, default_voice, output_format, output_audio_rate)
class WatsonTTSProvider(Provider):
"""IBM Watson TTS api provider."""
2019-11-30 16:57:04 +01:00
def __init__(self, service, supported_languages, default_voice, output_format, output_audio_rate):
"""Initialize Watson TTS provider."""
self.service = service
self.supported_langs = supported_languages
self.default_lang = default_voice[:5]
self.default_voice = default_voice
self.output_format = output_format
self.output_audio_rate = output_audio_rate
2019-11-30 16:57:04 +01:00
self.name = "Watson TTS"
@property
def supported_languages(self):
"""Return a list of supported languages."""
return self.supported_langs
@property
def default_language(self):
"""Return the default language."""
return self.default_lang
@property
def default_options(self):
"""Return dict include default options."""
return {CONF_VOICE: self.default_voice}
@property
def supported_options(self):
"""Return a list of supported options."""
return [CONF_VOICE]
def get_tts_audio(self, message, language=None, options=None):
"""Request TTS file from Watson TTS."""
output_format = self.output_format
if self.output_audio_rate:
output_format += f";rate={self.output_audio_rate}"
response = self.service.synthesize(
2019-11-30 16:57:04 +01:00
message, accept=output_format, voice=self.default_voice
).get_result()
2019-11-30 16:57:04 +01:00
return (CONTENT_TYPE_EXTENSIONS[self.output_format], response.content)