ignore third party
This commit is contained in:
@ -1,4 +0,0 @@
|
||||
"""This module contains the speech recognition and speech synthesis functions."""
|
||||
from autogpt.speech.say import say_text
|
||||
|
||||
__all__ = ["say_text"]
|
||||
@ -1,50 +0,0 @@
|
||||
"""Base class for all voice classes."""
|
||||
import abc
|
||||
from threading import Lock
|
||||
|
||||
from autogpt.config import AbstractSingleton
|
||||
|
||||
|
||||
class VoiceBase(AbstractSingleton):
|
||||
"""
|
||||
Base class for all voice classes.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Initialize the voice class.
|
||||
"""
|
||||
self._url = None
|
||||
self._headers = None
|
||||
self._api_key = None
|
||||
self._voices = []
|
||||
self._mutex = Lock()
|
||||
self._setup()
|
||||
|
||||
def say(self, text: str, voice_index: int = 0) -> bool:
|
||||
"""
|
||||
Say the given text.
|
||||
|
||||
Args:
|
||||
text (str): The text to say.
|
||||
voice_index (int): The index of the voice to use.
|
||||
"""
|
||||
with self._mutex:
|
||||
return self._speech(text, voice_index)
|
||||
|
||||
@abc.abstractmethod
|
||||
def _setup(self) -> None:
|
||||
"""
|
||||
Setup the voices, API key, etc.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def _speech(self, text: str, voice_index: int = 0) -> bool:
|
||||
"""
|
||||
Play the given text.
|
||||
|
||||
Args:
|
||||
text (str): The text to play.
|
||||
"""
|
||||
pass
|
||||
@ -1,43 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import requests
|
||||
from playsound import playsound
|
||||
|
||||
from autogpt.speech.base import VoiceBase
|
||||
|
||||
|
||||
class BrianSpeech(VoiceBase):
|
||||
"""Brian speech module for autogpt"""
|
||||
|
||||
def _setup(self) -> None:
|
||||
"""Setup the voices, API key, etc."""
|
||||
pass
|
||||
|
||||
def _speech(self, text: str, _: int = 0) -> bool:
|
||||
"""Speak text using Brian with the streamelements API
|
||||
|
||||
Args:
|
||||
text (str): The text to speak
|
||||
|
||||
Returns:
|
||||
bool: True if the request was successful, False otherwise
|
||||
"""
|
||||
tts_url = (
|
||||
f"https://api.streamelements.com/kappa/v2/speech?voice=Brian&text={text}"
|
||||
)
|
||||
response = requests.get(tts_url)
|
||||
|
||||
if response.status_code == 200:
|
||||
with open("speech.mp3", "wb") as f:
|
||||
f.write(response.content)
|
||||
playsound("speech.mp3")
|
||||
os.remove("speech.mp3")
|
||||
return True
|
||||
else:
|
||||
logging.error(
|
||||
"Request failed with status code: %s, response content: %s",
|
||||
response.status_code,
|
||||
response.content,
|
||||
)
|
||||
return False
|
||||
@ -1,86 +0,0 @@
|
||||
"""ElevenLabs speech module"""
|
||||
import os
|
||||
|
||||
import requests
|
||||
from playsound import playsound
|
||||
|
||||
from autogpt.config import Config
|
||||
from autogpt.speech.base import VoiceBase
|
||||
|
||||
PLACEHOLDERS = {"your-voice-id"}
|
||||
|
||||
|
||||
class ElevenLabsSpeech(VoiceBase):
|
||||
"""ElevenLabs speech class"""
|
||||
|
||||
def _setup(self) -> None:
|
||||
"""Set up the voices, API key, etc.
|
||||
|
||||
Returns:
|
||||
None: None
|
||||
"""
|
||||
|
||||
cfg = Config()
|
||||
default_voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
|
||||
voice_options = {
|
||||
"Rachel": "21m00Tcm4TlvDq8ikWAM",
|
||||
"Domi": "AZnzlk1XvdvUeBnXmlld",
|
||||
"Bella": "EXAVITQu4vr4xnSDxMaL",
|
||||
"Antoni": "ErXwobaYiN019PkySvjV",
|
||||
"Elli": "MF3mGyEYCl7XYWbV9V6O",
|
||||
"Josh": "TxGEqnHWrfWFTfGW9XjX",
|
||||
"Arnold": "VR6AewLTigWG4xSOukaG",
|
||||
"Adam": "pNInz6obpgDQGcFmaJgB",
|
||||
"Sam": "yoZ06aMxZJJ28mfd3POQ",
|
||||
}
|
||||
self._headers = {
|
||||
"Content-Type": "application/json",
|
||||
"xi-api-key": cfg.elevenlabs_api_key,
|
||||
}
|
||||
self._voices = default_voices.copy()
|
||||
if cfg.elevenlabs_voice_1_id in voice_options:
|
||||
cfg.elevenlabs_voice_1_id = voice_options[cfg.elevenlabs_voice_1_id]
|
||||
if cfg.elevenlabs_voice_2_id in voice_options:
|
||||
cfg.elevenlabs_voice_2_id = voice_options[cfg.elevenlabs_voice_2_id]
|
||||
self._use_custom_voice(cfg.elevenlabs_voice_1_id, 0)
|
||||
self._use_custom_voice(cfg.elevenlabs_voice_2_id, 1)
|
||||
|
||||
def _use_custom_voice(self, voice, voice_index) -> None:
|
||||
"""Use a custom voice if provided and not a placeholder
|
||||
|
||||
Args:
|
||||
voice (str): The voice ID
|
||||
voice_index (int): The voice index
|
||||
|
||||
Returns:
|
||||
None: None
|
||||
"""
|
||||
# Placeholder values that should be treated as empty
|
||||
if voice and voice not in PLACEHOLDERS:
|
||||
self._voices[voice_index] = voice
|
||||
|
||||
def _speech(self, text: str, voice_index: int = 0) -> bool:
|
||||
"""Speak text using elevenlabs.io's API
|
||||
|
||||
Args:
|
||||
text (str): The text to speak
|
||||
voice_index (int, optional): The voice to use. Defaults to 0.
|
||||
|
||||
Returns:
|
||||
bool: True if the request was successful, False otherwise
|
||||
"""
|
||||
tts_url = (
|
||||
f"https://api.elevenlabs.io/v1/text-to-speech/{self._voices[voice_index]}"
|
||||
)
|
||||
response = requests.post(tts_url, headers=self._headers, json={"text": text})
|
||||
|
||||
if response.status_code == 200:
|
||||
with open("speech.mpeg", "wb") as f:
|
||||
f.write(response.content)
|
||||
playsound("speech.mpeg", True)
|
||||
os.remove("speech.mpeg")
|
||||
return True
|
||||
else:
|
||||
print("Request failed with status code:", response.status_code)
|
||||
print("Response content:", response.content)
|
||||
return False
|
||||
@ -1,23 +0,0 @@
|
||||
""" GTTS Voice. """
|
||||
import os
|
||||
|
||||
import gtts
|
||||
from playsound import playsound
|
||||
|
||||
from autogpt.speech.base import VoiceBase
|
||||
|
||||
|
||||
class GTTSVoice(VoiceBase):
|
||||
"""GTTS Voice."""
|
||||
|
||||
def _setup(self) -> None:
|
||||
pass
|
||||
|
||||
def _speech(self, text: str, _: int = 0) -> bool:
|
||||
"""Play the given text."""
|
||||
tts = gtts.gTTS(text)
|
||||
tts.save("speech.mp3")
|
||||
playsound("speech.mp3", True)
|
||||
os.remove("speech.mp3")
|
||||
return True
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
""" MacOS TTS Voice. """
|
||||
import os
|
||||
|
||||
from autogpt.speech.base import VoiceBase
|
||||
|
||||
|
||||
class MacOSTTS(VoiceBase):
|
||||
"""MacOS TTS Voice."""
|
||||
|
||||
def _setup(self) -> None:
|
||||
pass
|
||||
|
||||
def _speech(self, text: str, voice_index: int = 0) -> bool:
|
||||
"""Play the given text."""
|
||||
if voice_index == 0:
|
||||
os.system(f'say "{text}"')
|
||||
elif voice_index == 1:
|
||||
os.system(f'say -v "Ava (Premium)" "{text}"')
|
||||
else:
|
||||
os.system(f'say -v Samantha "{text}"')
|
||||
return True
|
||||
@ -1,46 +0,0 @@
|
||||
""" Text to speech module """
|
||||
import threading
|
||||
from threading import Semaphore
|
||||
|
||||
from autogpt.config import Config
|
||||
from autogpt.speech.brian import BrianSpeech
|
||||
from autogpt.speech.eleven_labs import ElevenLabsSpeech
|
||||
from autogpt.speech.gtts import GTTSVoice
|
||||
from autogpt.speech.macos_tts import MacOSTTS
|
||||
|
||||
CFG = Config()
|
||||
DEFAULT_VOICE_ENGINE = GTTSVoice()
|
||||
VOICE_ENGINE = None
|
||||
if CFG.elevenlabs_api_key:
|
||||
VOICE_ENGINE = ElevenLabsSpeech()
|
||||
elif CFG.use_mac_os_tts == "True":
|
||||
VOICE_ENGINE = MacOSTTS()
|
||||
elif CFG.use_brian_tts == "True":
|
||||
VOICE_ENGINE = BrianSpeech()
|
||||
else:
|
||||
VOICE_ENGINE = GTTSVoice()
|
||||
|
||||
|
||||
QUEUE_SEMAPHORE = Semaphore(
|
||||
1
|
||||
) # The amount of sounds to queue before blocking the main thread
|
||||
|
||||
|
||||
def say_text(text: str, voice_index: int = 0) -> None:
|
||||
"""Speak the given text using the given voice index"""
|
||||
|
||||
def speak() -> None:
|
||||
success = VOICE_ENGINE.say(text, voice_index)
|
||||
if not success:
|
||||
DEFAULT_VOICE_ENGINE.say(text)
|
||||
|
||||
QUEUE_SEMAPHORE.release()
|
||||
|
||||
QUEUE_SEMAPHORE.acquire(True)
|
||||
thread = threading.Thread(target=speak)
|
||||
thread.start()
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
say_text('你好呀')
|
||||
Reference in New Issue
Block a user