import gradio as gr import speech_recognition as sr from Levenshtein import ratio import tempfile import numpy as np import soundfile as sf def transcribe_audio(file_info): r = sr.Recognizer() with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile: sf.write(tmpfile.name, data=file_info, samplerate=44100, format='WAV') tmpfile.seek(0) with sr.AudioFile(tmpfile.name) as source: audio_data = r.record(source) try: text = r.recognize_google(audio_data) return text except sr.UnknownValueError: return "Could not understand audio" except sr.RequestError as e: return f"Could not request results; {e}" def pronunciation_correction(expected_text, file_info): user_spoken_text = transcribe_audio(file_info) similarity = ratio(expected_text.lower(), user_spoken_text.lower()) description = f"{similarity:.2f}" if similarity >= 0.9: feedback = "Excellent pronunciation!" elif similarity >= 0.7: feedback = "Good pronunciation!" elif similarity >= 0.5: feedback = "Needs improvement." else: feedback = "Poor pronunciation, try to focus more on clarity." return feedback, description def get_sentence(): return gr.inputs.Textbox(label="Enter a Sentence") sentence_input = get_sentence() audio_input = gr.inputs.Audio(label="Upload Audio File") gr.Interface(pronunciation_correction, inputs=[sentence_input, audio_input], outputs=["text", "text"]).launch()