Spaces:
Sleeping
Sleeping
File size: 3,350 Bytes
5995a5d fb93a17 d6fa022 fb93a17 d6fa022 fb93a17 d6fa022 fb93a17 18faa93 d6fa022 18faa93 fb93a17 d6fa022 18faa93 d6fa022 4462320 c81d3a2 18faa93 d6fa022 18faa93 d6fa022 fb93a17 d6fa022 fb93a17 d6fa022 c994feb d6fa022 2a0284a d6fa022 2a0284a d6fa022 5995a5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import gradio as gr
from gtts import gTTS
import time
import difflib
import tempfile
import os
import speech_recognition as sr
# Function to play the text (optional)
def play_text(text):
tts = gTTS(text=text, lang='hi', slow=False)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
tts.save(temp_file.name)
os.system(f"start {temp_file.name}") # Windows
return "✅ Text is being read out. Please listen and read it yourself."
# Function to transcribe user's audio and compare with the original text
def transcribe_audio(audio, original_text):
recognizer = sr.Recognizer()
with sr.AudioFile(audio) as source:
audio_data = recognizer.record(source)
try:
start_time = time.time()
# Split the audio into chunks (1-minute chunks in this example)
audio_length = len(audio_data.frame_data)
chunk_size = 60000 # 1 minute (60,000 ms)
# Splitting audio data into chunks
chunks = [audio_data.frame_data[i:i+chunk_size] for i in range(0, audio_length, chunk_size)]
transcription = ""
for chunk in chunks:
audio_chunk = sr.AudioData(chunk, audio_data.sample_rate, audio_data.sample_width)
# Using Google Speech Recognition (supports Hindi)
transcription += recognizer.recognize_google(audio_chunk, language="hi-IN") + " "
end_time = time.time()
# Calculate Accuracy
original_words = original_text.strip().split()
transcribed_words = transcription.strip().split()
matcher = difflib.SequenceMatcher(None, original_words, transcribed_words)
accuracy = round(matcher.ratio() * 100, 2)
# Calculate speed
duration = end_time - start_time # time to process (not speaking time)
speed = round(len(transcribed_words) / duration, 2) # words per second
# Compare words and highlight mistakes
wrong_words = []
for i, word in enumerate(original_words):
if i >= len(transcribed_words) or word != transcribed_words[i]:
wrong_words.append(f"🔴 {word}")
result = {
"📝 Transcribed Text": transcription,
"🎯 Accuracy (%)": accuracy,
"⏱️ Speaking Speed (words/sec)": speed,
"❌ Incorrect Words": ' '.join(wrong_words) if wrong_words else "None"
}
return result
except Exception as e:
return {"error": str(e)}
# Gradio App
with gr.Blocks() as app:
gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App")
with gr.Row():
input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
play_button = gr.Button("🔊 Listen to Text")
play_button.click(play_text, inputs=[input_text], outputs=[])
gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
submit_button = gr.Button("✅ Submit Recording for Checking")
output = gr.JSON(label="Results")
submit_button.click(transcribe_audio, inputs=[audio_input, input_text], outputs=[output])
# Launch the app
app.launch()
|