File size: 3,350 Bytes
5995a5d
fb93a17
 
d6fa022
 
 
 
fb93a17
d6fa022
 
fb93a17
d6fa022
 
 
 
fb93a17
18faa93
d6fa022
 
 
 
18faa93
fb93a17
d6fa022
18faa93
 
 
 
 
 
 
 
 
 
 
 
 
d6fa022
 
 
 
 
 
 
 
 
 
4462320
c81d3a2
18faa93
 
 
 
 
 
d6fa022
 
 
18faa93
 
d6fa022
 
fb93a17
d6fa022
 
 
 
 
fb93a17
d6fa022
 
 
 
 
 
c994feb
 
d6fa022
 
2a0284a
d6fa022
2a0284a
d6fa022
 
 
5995a5d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
from gtts import gTTS
import time
import difflib
import tempfile
import os
import speech_recognition as sr

# Function to play the text (optional)
def play_text(text):
    tts = gTTS(text=text, lang='hi', slow=False)
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
    tts.save(temp_file.name)
    os.system(f"start {temp_file.name}")  # Windows
    return "✅ Text is being read out. Please listen and read it yourself."

# Function to transcribe user's audio and compare with the original text
def transcribe_audio(audio, original_text):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio) as source:
        audio_data = recognizer.record(source)
    
    try:
        start_time = time.time()
        # Split the audio into chunks (1-minute chunks in this example)
        audio_length = len(audio_data.frame_data)
        chunk_size = 60000  # 1 minute (60,000 ms)
        
        # Splitting audio data into chunks
        chunks = [audio_data.frame_data[i:i+chunk_size] for i in range(0, audio_length, chunk_size)]
        
        transcription = ""
        for chunk in chunks:
            audio_chunk = sr.AudioData(chunk, audio_data.sample_rate, audio_data.sample_width)
            # Using Google Speech Recognition (supports Hindi)
            transcription += recognizer.recognize_google(audio_chunk, language="hi-IN") + " "
        
        end_time = time.time()
        
        # Calculate Accuracy
        original_words = original_text.strip().split()
        transcribed_words = transcription.strip().split()
        matcher = difflib.SequenceMatcher(None, original_words, transcribed_words)
        accuracy = round(matcher.ratio() * 100, 2)
        
        # Calculate speed
        duration = end_time - start_time  # time to process (not speaking time)
        speed = round(len(transcribed_words) / duration, 2)  # words per second
        
        # Compare words and highlight mistakes
        wrong_words = []
        for i, word in enumerate(original_words):
            if i >= len(transcribed_words) or word != transcribed_words[i]:
                wrong_words.append(f"🔴 {word}")
        
        result = {
            "📝 Transcribed Text": transcription,
            "🎯 Accuracy (%)": accuracy,
            "⏱️ Speaking Speed (words/sec)": speed,
            "❌ Incorrect Words": ' '.join(wrong_words) if wrong_words else "None"
        }
        return result
    except Exception as e:
        return {"error": str(e)}

# Gradio App
with gr.Blocks() as app:
    gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App")
    
    with gr.Row():
        input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
        play_button = gr.Button("🔊 Listen to Text")

    play_button.click(play_text, inputs=[input_text], outputs=[])

    gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
    audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
    
    submit_button = gr.Button("✅ Submit Recording for Checking")
    
    output = gr.JSON(label="Results")
    
    submit_button.click(transcribe_audio, inputs=[audio_input, input_text], outputs=[output])

# Launch the app
app.launch()