File size: 2,220 Bytes
518d206
41f4425
4124ee1
41f4425
9d4a77e
41f4425
518d206
41f4425
 
 
9ef4232
41f4425
 
 
 
 
 
 
 
 
 
518d206
a8c79fb
41f4425
a8c79fb
4124ee1
74e03af
4124ee1
 
 
 
 
 
 
 
74e03af
3695d99
518d206
9723fb0
 
 
 
9ef4232
9d4a77e
9ef4232
9723fb0
 
9ef4232
 
 
 
 
9723fb0
 
9ef4232
9723fb0
9ef4232
9723fb0
9ef4232
 
9723fb0
9ef4232
 
9d4a77e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
import speech_recognition as sr
from Levenshtein import ratio
import tempfile
import numpy as np
import soundfile as sf

def transcribe_audio(file_info):
    r = sr.Recognizer()
    with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile:
        sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV')
        tmpfile.seek(0)
        with sr.AudioFile(tmpfile.name) as source:
            audio_data = r.record(source)
    try:
        text = r.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results; {e}"

def pronunciation_correction(expected_text, file_info):
    user_spoken_text = transcribe_audio(file_info)
    similarity = ratio(expected_text.lower(), user_spoken_text.lower())
    description = f"{similarity:.2f}"

    if similarity >= 0.9:
        feedback = "Excellent pronunciation!"
    elif similarity >= 0.7:
        feedback = "Good pronunciation!"
    elif similarity >= 0.5:
        feedback = "Needs improvement."
    else:
        feedback = "Poor pronunciation, try to focus more on clarity."

    return feedback, description

def validate_sentence(sentence):
    if not sentence.strip():
        return "Please enter a sentence."
    return sentence

with gr.Blocks() as app:
    with gr.Row():
        sentence_input = gr.Textbox(label="Enter Your Sentence Here")
    validated_sentence = gr.Textbox(label="Valid Sentence", interactive=False)
    audio_input = gr.Audio(label="Upload Audio File", type="numpy")
    check_pronunciation_button = gr.Button("Check Pronunciation")
    pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback")
    pronunciation_score = gr.Number(label="Pronunciation Accuracy Score: 0 (No Match) ~ 1 (Perfect)")

    sentence_input.change(
        validate_sentence,
        inputs=sentence_input,
        outputs=validated_sentence
    )

    check_pronunciation_button.click(
        pronunciation_correction,
        inputs=[validated_sentence, audio_input],
        outputs=[pronunciation_feedback, pronunciation_score]
    )

app.launch(debug=True)