import gradio as gr import whisper from translate import Translator from dotenv import dotenv_values from elevenlabs.client import ElevenLabs from elevenlabs import VoiceSettings # requirements.txt """ gradio openai-whisper translate python-dotenv elevenlabs """ # Configuración .env # config = dotenv_values(".env") # ELEVENLABS_API_KEY = config["ELEVENLABS_API_KEY"] ELEVENLABS_API_KEY = "MY_API_KEY" def translator(audio_file): # 1. Transcribir texto # Usamos Whisper: https://github.com/openai/whisper # Alternativa API online: https://www.assemblyai.com try: model = whisper.load_model("base") result = model.transcribe(audio_file, language="Spanish", fp16=False) transcription = result["text"] except Exception as e: raise gr.Error( f"Se ha producido un error transcribiendo el texto: {str(e)}") print(f"Texto original: {transcription}") # 2. Traducir texto # Usamos Translate: https://github.com/terryyin/translate-python try: en_transcription = Translator( from_lang="es", to_lang="en").translate(transcription) it_transcription = Translator( from_lang="es", to_lang="it").translate(transcription) fr_transcription = Translator( from_lang="es", to_lang="fr").translate(transcription) ja_transcription = Translator( from_lang="es", to_lang="ja").translate(transcription) except Exception as e: raise gr.Error( f"Se ha producido un error traduciendo el texto: {str(e)}") print(f"Texto traducido a Inglés: {en_transcription}") print(f"Texto traducido a Italiano: {it_transcription}") print(f"Texto traducido a Francés: {fr_transcription}") print(f"Texto traducido a Japonés: {ja_transcription}") # 3. Generar audio traducido # Usamos Elevenlabs IO: https://elevenlabs.io/docs/api-reference/getting-started en_save_file_path = text_to_speach(en_transcription, "en") it_save_file_path = text_to_speach(it_transcription, "it") fr_save_file_path = text_to_speach(fr_transcription, "fr") ja_save_file_path = text_to_speach(ja_transcription, "ja") return en_save_file_path, it_save_file_path, fr_save_file_path, ja_save_file_path def text_to_speach(text: str, language: str) -> str: try: client = ElevenLabs(api_key=ELEVENLABS_API_KEY) response = client.text_to_speech.convert( voice_id="pNInz6obpgDQGcFmaJgB", # Adam optimize_streaming_latency="0", output_format="mp3_22050_32", text=text, model_id="eleven_turbo_v2", voice_settings=VoiceSettings( stability=0.0, similarity_boost=0.0, style=0.0, use_speaker_boost=True, ), ) save_file_path = f"{language}.mp3" with open(save_file_path, "wb") as f: for chunk in response: if chunk: f.write(chunk) except Exception as e: raise gr.Error( f"Se ha producido un error creando el audio: {str(e)}") return save_file_path web = gr.Interface( fn=translator, inputs=gr.Audio( sources=["microphone"], type="filepath", label="Español" ), outputs=[ gr.Audio(label="Inglés"), gr.Audio(label="Italiano"), gr.Audio(label="Francés"), gr.Audio(label="Japonés") ], title="Traductor de voz", description="Traductor de voz con IA a varios idiomas" ) web.launch()