Brasd99's picture
initial commit
2afbf1e
raw
history blame
No virus
2.84 kB
from TTS.api import TTS
from bs4 import BeautifulSoup
import requests
import streamlit as st
import tempfile
import os
import json
import datetime
with open('config.json', 'r') as f:
config = json.load(f)
APP_NAME = config['APP_NAME']
APP_LOGO = config['APP_LOGO']
APP_DESCRIPTION = config['APP_DESCRIPTION']
LANGUAGES_URL = config['LANGUAGES_URL']
def contains_only_ascii(input_string):
return all(ord(char) < 128 for char in input_string)
def get_iso_languages():
response = requests.get(LANGUAGES_URL)
soup = BeautifulSoup(response.text, 'html.parser')
p_tags = soup.find_all('p')
iso_language_dict = {}
for p_tag in p_tags[1:]: # Skipping the first <p> which contains the header
parts = p_tag.get_text().split()
if len(parts) == 2:
iso_code, language_name = parts
if contains_only_ascii(language_name):
iso_language_dict[language_name] = iso_code
return iso_language_dict
def create_temp_file(input_wav):
temp_file = tempfile.NamedTemporaryFile(delete=False)
temp_file.write(input_wav.read())
return temp_file
def remove_temp_file(temp_file):
temp_file.close()
os.remove(temp_file.name)
def update_progress(percent, text):
progress_bar.progress(percent)
status_text.text(text)
iso_languages = get_iso_languages()
languages = list(iso_languages.keys())
st.set_page_config(page_title=APP_NAME)
st.title(APP_NAME)
st.image(APP_LOGO, use_column_width=True)
st.markdown(APP_DESCRIPTION)
language = st.selectbox('Select a language', languages)
input_wav = st.file_uploader("Upload a WAV file with your voice", type=["wav"])
clone_wav = st.file_uploader("Upload a WAV file with voice to clone", type=["wav"])
if input_wav and clone_wav:
progress_bar = st.progress(0)
status_text = st.empty()
current_datetime = datetime.datetime.now()
formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H%M%S")
output_filename = f"recording_{formatted_datetime}.wav"
temp_input_file = create_temp_file(input_wav)
temp_clone_file = create_temp_file(clone_wav)
iso_code = iso_languages[language]
print(f'Language: {language}')
update_progress(0, 'Loading TTS model...')
api = TTS(f"tts_models/{iso_code}/fairseq/vits")
update_progress(50, 'Generating audio...')
api.voice_conversion_to_file(
source_wav=temp_input_file.name,
target_wav=temp_clone_file.name,
file_path=output_filename
)
remove_temp_file(temp_input_file)
remove_temp_file(temp_clone_file)
audio_file = open(output_filename, 'rb')
audio_bytes = audio_file.read()
update_progress(100, 'Audio generated successfully!')
st.audio(audio_bytes, format='audio/wav')
st.download_button('Download WAV', data=audio_bytes, file_name='output.wav')