File size: 2,547 Bytes
b63476f
fb93a17
 
d6fa022
 
 
 
fb93a17
d6fa022
 
fb93a17
d6fa022
 
 
 
fb93a17
bf2d620
d6fa022
 
 
 
fb93a17
d6fa022
bf2d620
 
d6fa022
 
 
 
 
 
 
 
 
 
bf2d620
c81d3a2
bf2d620
18faa93
d6fa022
 
 
bf2d620
d6fa022
 
fb93a17
d6fa022
 
 
 
 
fb93a17
d6fa022
 
 
 
 
 
c994feb
 
d6fa022
 
2a0284a
d6fa022
2a0284a
d6fa022
 
 
5995a5d
bf2d620
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
from gtts import gTTS
import time
import difflib
import tempfile
import os
import speech_recognition as sr

# Function to play the text (optional)
def play_text(text):
    tts = gTTS(text=text, lang='hi', slow=False)
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
    tts.save(temp_file.name)
    os.system(f"start {temp_file.name}")  # Windows
    return "✅ Text is being read out. Please listen and read it yourself."

# Function to transcribe user's audio
def transcribe_audio(audio, original_text):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio) as source:
        audio_data = recognizer.record(source)
    try:
        start_time = time.time()
        # Using Google Speech Recognition (supports Hindi)
        transcription = recognizer.recognize_google(audio_data, language="hi-IN")
        end_time = time.time()
        
        # Calculate Accuracy
        original_words = original_text.strip().split()
        transcribed_words = transcription.strip().split()
        matcher = difflib.SequenceMatcher(None, original_words, transcribed_words)
        accuracy = round(matcher.ratio() * 100, 2)
        
        # Calculate speed
        duration = end_time - start_time  # time to process (not speaking time)
        # Better: estimate speaking time from audio length if needed (advanced)
        
        speed = round(len(transcribed_words) / duration, 2)  # words per second
        
        result = {
            "📝 Transcribed Text": transcription,
            "🎯 Accuracy (%)": accuracy,
            "⏱️ Speaking Speed (words/sec)": speed
        }
        return result
    except Exception as e:
        return {"error": str(e)}

# Gradio App
with gr.Blocks() as app:
    gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App")
    
    with gr.Row():
        input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
        play_button = gr.Button("🔊 Listen to Text")

    play_button.click(play_text, inputs=[input_text], outputs=[])

    gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
    audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
    
    submit_button = gr.Button("✅ Submit Recording for Checking")
    
    output = gr.JSON(label="Results")
    
    submit_button.click(transcribe_audio, inputs=[audio_input, input_text], outputs=[output])

# Launch the app
app.launch()