mr2along commited on
Commit
3e9568e
1 Parent(s): 999ce1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -6
app.py CHANGED
@@ -3,14 +3,28 @@ import difflib
3
  import gradio as gr
4
  from gtts import gTTS
5
  import io
 
 
6
 
7
  # Step 1: Transcribe the audio file
8
  def transcribe_audio(audio):
9
  recognizer = sr.Recognizer()
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Convert audio into recognizable format for the Recognizer
12
  audio_file = sr.AudioFile(audio)
13
-
14
  with audio_file as source:
15
  audio_data = recognizer.record(source)
16
 
@@ -25,7 +39,7 @@ def transcribe_audio(audio):
25
 
26
  # Step 2: Create pronunciation audio for incorrect words
27
  def create_pronunciation_audio(word):
28
- tts = gTTS(word, lang='en') # Specify the language for TTS
29
  audio_buffer = io.BytesIO()
30
  tts.save(audio_buffer)
31
  audio_buffer.seek(0)
@@ -49,7 +63,7 @@ def compare_texts(reference_text, transcribed_text):
49
 
50
  # Generate colored word score list
51
  for i, word in enumerate(reference_words):
52
- if i < len(transcribed_words):
53
  if word.lower() == transcribed_words[i].lower():
54
  html_output += f'<span style="color: green;">{word}</span> ' # Correct words in green
55
  elif difflib.get_close_matches(word, transcribed_words):
@@ -62,8 +76,7 @@ def compare_texts(reference_text, transcribed_text):
62
  # Encode the audio as base64 for playback
63
  audio_base64 = audio_buffer.getvalue().hex()
64
  incorrect_words_audios.append((word, audio_base64))
65
- else:
66
- # If reference word has no corresponding transcribed word
67
  html_output += f'<span style="color: red;">{word}</span> ' # Words in reference that were not transcribed
68
 
69
  # Provide audio for incorrect words
@@ -78,7 +91,7 @@ def compare_texts(reference_text, transcribed_text):
78
 
79
  # Step 4: Text-to-Speech Function
80
  def text_to_speech(paragraph):
81
- tts = gTTS(paragraph, lang='en') # Specify the language for TTS
82
  audio_buffer = io.BytesIO()
83
  tts.save(audio_buffer)
84
  audio_buffer.seek(0)
 
3
  import gradio as gr
4
  from gtts import gTTS
5
  import io
6
+ import os
7
+ from pydub import AudioSegment
8
 
9
  # Step 1: Transcribe the audio file
10
  def transcribe_audio(audio):
11
  recognizer = sr.Recognizer()
12
+ audio_format = audio.split('.')[-1].lower()
13
+
14
+ # Convert to WAV if the audio is not in a supported format
15
+ if audio_format != 'wav':
16
+ try:
17
+ # Load the audio file with pydub
18
+ audio_segment = AudioSegment.from_file(audio)
19
+ wav_path = audio.replace(audio_format, 'wav')
20
+ audio_segment.export(wav_path, format='wav') # Convert to WAV
21
+ audio = wav_path # Update audio path to the converted file
22
+ except Exception as e:
23
+ return f"Error converting audio: {e}"
24
 
25
  # Convert audio into recognizable format for the Recognizer
26
  audio_file = sr.AudioFile(audio)
27
+
28
  with audio_file as source:
29
  audio_data = recognizer.record(source)
30
 
 
39
 
40
  # Step 2: Create pronunciation audio for incorrect words
41
  def create_pronunciation_audio(word):
42
+ tts = gTTS(word)
43
  audio_buffer = io.BytesIO()
44
  tts.save(audio_buffer)
45
  audio_buffer.seek(0)
 
63
 
64
  # Generate colored word score list
65
  for i, word in enumerate(reference_words):
66
+ try:
67
  if word.lower() == transcribed_words[i].lower():
68
  html_output += f'<span style="color: green;">{word}</span> ' # Correct words in green
69
  elif difflib.get_close_matches(word, transcribed_words):
 
76
  # Encode the audio as base64 for playback
77
  audio_base64 = audio_buffer.getvalue().hex()
78
  incorrect_words_audios.append((word, audio_base64))
79
+ except IndexError:
 
80
  html_output += f'<span style="color: red;">{word}</span> ' # Words in reference that were not transcribed
81
 
82
  # Provide audio for incorrect words
 
91
 
92
  # Step 4: Text-to-Speech Function
93
  def text_to_speech(paragraph):
94
+ tts = gTTS(paragraph)
95
  audio_buffer = io.BytesIO()
96
  tts.save(audio_buffer)
97
  audio_buffer.seek(0)