File size: 2,400 Bytes
518d206
41f4425
4124ee1
41f4425
9d4a77e
41f4425
518d206
41f4425
 
 
9ef4232
41f4425
 
 
 
 
 
 
 
 
 
518d206
a8c79fb
c8b7959
41f4425
c8b7959
 
a8c79fb
4124ee1
c8b7959
4124ee1
 
 
 
 
 
 
 
c8b7959
 
3695d99
518d206
9723fb0
 
 
 
9ef4232
9d4a77e
9ef4232
9723fb0
 
c8b7959
9ef4232
 
 
 
9723fb0
 
9ef4232
9723fb0
9ef4232
9723fb0
9ef4232
 
9723fb0
9ef4232
 
9d4a77e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
import speech_recognition as sr
from Levenshtein import ratio
import tempfile
import numpy as np
import soundfile as sf

def transcribe_audio(file_info):
    r = sr.Recognizer()
    with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile:
        sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV')
        tmpfile.seek(0)
        with sr.AudioFile(tmpfile.name) as source:
            audio_data = r.record(source)
    try:
        text = r.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results; {e}"

def pronunciation_correction(expected_text, file_info):
    print(f"Expected text: {expected_text}")
    user_spoken_text = transcribe_audio(file_info)
    print(f"User spoken text: {user_spoken_text}")
    
    similarity = ratio(expected_text.lower(), user_spoken_text.lower())
    description = f"{similarity:.2f}"
    
    if similarity >= 0.9:
        feedback = "Excellent pronunciation!"
    elif similarity >= 0.7:
        feedback = "Good pronunciation!"
    elif similarity >= 0.5:
        feedback = "Needs improvement."
    else:
        feedback = "Poor pronunciation, try to focus more on clarity."
    
    print(f"Similarity: {similarity}, Feedback: {feedback}")
    return feedback, description

def validate_sentence(sentence):
    if not sentence.strip():
        return "Please enter a sentence."
    return sentence

with gr.Blocks() as app:
    with gr.Row():
        sentence_input = gr.Textbox(label="Enter Your Sentence Here")
    validated_sentence = gr.Textbox(label="Valid Sentence", interactive=False)
    audio_input = gr.Audio(label="Upload or Record Audio File", type="numpy")
    check_pronunciation_button = gr.Button("Check Pronunciation")
    pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback")
    pronunciation_score = gr.Number(label="Pronunciation Accuracy Score: 0 (No Match) ~ 1 (Perfect)")

    sentence_input.change(
        validate_sentence,
        inputs=sentence_input,
        outputs=validated_sentence
    )

    check_pronunciation_button.click(
        pronunciation_correction,
        inputs=[validated_sentence, audio_input],
        outputs=[pronunciation_feedback, pronunciation_score]
    )

app.launch(debug=True)