import gradio as gr import speech_recognition as sr from Levenshtein import ratio import tempfile import numpy as np import soundfile as sf def transcribe_audio(file_info): r = sr.Recognizer() with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile: sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV') tmpfile.seek(0) with sr.AudioFile(tmpfile.name) as source: audio_data = r.record(source) try: text = r.recognize_google(audio_data) return text except sr.UnknownValueError: return "Could not understand audio" except sr.RequestError as e: return f"Could not request results; {e}" def pronunciation_correction(expected_text, file_info): print(f"Expected text: {expected_text}") user_spoken_text = transcribe_audio(file_info) print(f"User spoken text: {user_spoken_text}") similarity = ratio(expected_text.lower(), user_spoken_text.lower()) description = f"{similarity:.2f}" if similarity >= 0.9: feedback = "Excellent pronunciation!" elif similarity >= 0.7: feedback = "Good pronunciation!" elif similarity >= 0.5: feedback = "Needs improvement." else: feedback = "Poor pronunciation, try to focus more on clarity." print(f"Similarity: {similarity}, Feedback: {feedback}") return feedback, description def validate_sentence(sentence): if not sentence.strip(): return "Please enter a sentence." return sentence with gr.Blocks() as app: with gr.Row(): sentence_input = gr.Textbox(label="Enter Your Sentence Here") validated_sentence = gr.Textbox(label="Valid Sentence", interactive=False) audio_input = gr.Audio(label="Upload or Record Audio File", type="numpy") check_pronunciation_button = gr.Button("Check Pronunciation") pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback") pronunciation_score = gr.Number(label="Pronunciation Accuracy Score: 0 (No Match) ~ 1 (Perfect)") sentence_input.change( validate_sentence, inputs=sentence_input, outputs=validated_sentence ) check_pronunciation_button.click( pronunciation_correction, inputs=[validated_sentence, audio_input], outputs=[pronunciation_feedback, pronunciation_score] ) app.launch(debug=True)