import torch from transformers import pipeline import numpy as np import gradio as gr device = torch.device("cuda" if torch.cuda.is_available() else "cpu") pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device) # Inference def generate_audio(text): output = pipe(text) output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label="Response Voice Player", show_label=True, visible=True) ###############language = "english" return output css = """ #container{ margin: 0 auto; max-width: 80rem; } #intro{ max-width: 100%; text-align: center; margin: 0 auto; } """ # Gradio blocks demo with gr.Blocks(css=css) as demo_blocks: with gr.Row(): with gr.Column(): inp_text = gr.Textbox(label="Input Text", info="What sentence would you like to synthesise?") btn = gr.Button("Generate Audio!") with gr.Column(): out_audio = gr.Audio(type="numpy", autoplay=True, label="Generated Audio - British Female Speaker", show_label=True, visible=True) btn.click(generate_audio, [inp_text], out_audio) demo_blocks.queue().launch()