Spaces:

coraKong
/

voice-cloning-demo

Build error

File size: 2,062 Bytes

c962c9a
 
 
 
3e0d7e1
 
e5efebc
 
e228397
c962c9a
392fff0
 
 
3e0d7e1
 
c2c3684
 
 
 
6045b6b
a6f1a9c
 
 
 
e228397
a6f1a9c
 
 
e228397
4c5bfad
b04ebb9
3e0d7e1
b04ebb9
3e0d7e1
02d8bcc
c962c9a
b04ebb9
392fff0
 
e228397
c962c9a
 
 
 
a25c8ec

import gradio as gr
from TTS.api import TTS

# Init TTS
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar = False, gpu=False)
zh_tts = TTS(model_name="tts_models/zh-CN/baker/tacotron2-DDC-GST", progress_bar=False, gpu=False)
de_tts = TTS(model_name = "tts_models/de/thorsten/vits", gpu=False)
# de_tts = TTS(model_name = "tts_models/de/thorsten/tacotron2-DCA", gpu=False)
es_tts = TTS(model_name = "tts_models/es/mai/tacotron2-DDC", progress_bar=False, gpu=False)

def text_to_speech(text: str, speaker_wav, speaker_wav_file, language: str):
    if speaker_wav_file and not speaker_wav:
        speaker_wav = speaker_wav_file
    file_path = "output.wav"
    if language == "zh-CN":
        # if speaker_wav is not None:
        #     zh_tts.tts_to_file(text, speaker_wav=speaker_wav, file_path=file_path)
        # else:
        zh_tts.tts_to_file(text, file_path=file_path)
    elif language == "de":
        # if speaker_wav is not None:
        #     de_tts.tts_to_file(text, speaker_wav=speaker_wav, file_path=file_path)
        # else:
        de_tts.tts_to_file(text, file_path=file_path)
    elif language == "es":
        # if speaker_wav is not None:
        #     es_tts.tts_to_file(text, speaker_wav=speaker_wav, file_path=file_path)
        # else:
        es_tts.tts_to_file(text, file_path=file_path)
    else:
        if speaker_wav is not None:
            tts.tts_to_file(text, speaker_wav=speaker_wav, language=language, file_path=file_path)
        else:
            tts.tts_to_file(text, speaker=tts.speakers[0], language=language, file_path=file_path)
    return file_path

inputs = [gr.Textbox(label="Input the text", value="", max_lines=3),
          gr.Audio(label="Voice to clone", source="microphone", type="filepath"),
          gr.Audio(label="Voice to clone", type="filepath"), 
            gr.Radio(label="Language", choices=["en", "zh-CN", "fr-fr", "de", "es"], value="en")]
outputs = gr.Audio(label="Output")

demo = gr.Interface(fn=text_to_speech, inputs=inputs, outputs=outputs)

demo.launch()