mr2along commited on
Commit
33ead9a
1 Parent(s): 257e787

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -28
app.py CHANGED
@@ -7,30 +7,11 @@ from gtts import gTTS
7
  import io
8
  from pydub import AudioSegment
9
  import time
 
10
 
11
  # Create audio directory if it doesn't exist
12
  if not os.path.exists('audio'):
13
  os.makedirs('audio')
14
- # Step 2: Create pronunciation audio for incorrect words
15
- def upfilepath(local_filename):
16
- ts = time.time()
17
- upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
18
- files = {'files': open(local_filename, 'rb')}
19
-
20
- try:
21
- response = requests.post(upload_url, files=files, timeout=30) # Set timeout (e.g., 30 seconds)
22
-
23
- if response.status_code == 200:
24
- result = response.json()
25
- extracted_path = result[0]
26
- return extracted_path
27
- else:
28
- return None
29
-
30
- except requests.exceptions.Timeout:
31
- return "Request timed out. Please try again."
32
- except Exception as e:
33
- return f"An error occurred: {e}"
34
 
35
  # Step 1: Transcribe the audio file
36
  def transcribe_audio(audio):
@@ -59,7 +40,7 @@ def transcribe_audio(audio):
59
  audio_data = recognizer.record(source)
60
 
61
  try:
62
- transcription = recognizer.recognize_google(audio_data)
63
  return transcription
64
  except sr.UnknownValueError:
65
  return "Google Speech Recognition could not understand the audio"
@@ -76,6 +57,27 @@ def create_pronunciation_audio(word):
76
  except Exception as e:
77
  return f"Failed to create pronunciation audio: {e}"
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # Step 3: Compare the transcribed text with the input paragraph
80
  def compare_texts(reference_text, transcribed_text):
81
  reference_words = reference_text.split()
@@ -123,12 +125,15 @@ def compare_texts(reference_text, transcribed_text):
123
  for word, audio in incorrect_words_audios:
124
  suggestion = difflib.get_close_matches(word, reference_words, n=1)
125
  suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
126
- up_audio=upfilepath(audio)
127
- audio_src=f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
128
  html_output += f'{word}: '
129
  html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
130
 
131
- #return [html_output, [audio for _, audio in incorrect_words_audios]]
 
 
 
132
  return [html_output]
133
 
134
  # Step 4: Text-to-Speech Function
@@ -150,7 +155,7 @@ def gradio_function(paragraph, audio):
150
 
151
  # Return comparison result
152
  return comparison_result
153
-
154
  # Gradio Interface using the updated API
155
  interface = gr.Interface(
156
  fn=gradio_function,
@@ -158,10 +163,9 @@ interface = gr.Interface(
158
  gr.Textbox(lines=5, label="Input Paragraph"),
159
  gr.Audio(type="filepath", label="Record Audio")
160
  ],
161
- #outputs=["html","files"],
162
  outputs=["html"],
163
- title="Speech Recognition Comparison",
164
- description="Input a paragraph, record your audio, and compare the transcription to the original text."
165
  )
166
 
167
  # Gradio Interface for Text-to-Speech
 
7
  import io
8
  from pydub import AudioSegment
9
  import time
10
+ from underthesea import phonetic
11
 
12
  # Create audio directory if it doesn't exist
13
  if not os.path.exists('audio'):
14
  os.makedirs('audio')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Step 1: Transcribe the audio file
17
  def transcribe_audio(audio):
 
40
  audio_data = recognizer.record(source)
41
 
42
  try:
43
+ transcription = recognizer.recognize_google(audio_data, language='vi-VN') # For Vietnamese
44
  return transcription
45
  except sr.UnknownValueError:
46
  return "Google Speech Recognition could not understand the audio"
 
57
  except Exception as e:
58
  return f"Failed to create pronunciation audio: {e}"
59
 
60
+ # Upload function to Hugging Face Space
61
+ def upfilepath(local_filename):
62
+ ts = time.time()
63
+ upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
64
+ files = {'files': open(local_filename, 'rb')}
65
+
66
+ try:
67
+ response = requests.post(upload_url, files=files, timeout=30) # Set timeout (e.g., 30 seconds)
68
+
69
+ if response.status_code == 200:
70
+ result = response.json()
71
+ extracted_path = result[0]
72
+ return extracted_path
73
+ else:
74
+ return None
75
+
76
+ except requests.exceptions.Timeout:
77
+ return "Request timed out. Please try again."
78
+ except Exception as e:
79
+ return f"An error occurred: {e}"
80
+
81
  # Step 3: Compare the transcribed text with the input paragraph
82
  def compare_texts(reference_text, transcribed_text):
83
  reference_words = reference_text.split()
 
125
  for word, audio in incorrect_words_audios:
126
  suggestion = difflib.get_close_matches(word, reference_words, n=1)
127
  suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
128
+ up_audio = upfilepath(audio)
129
+ audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
130
  html_output += f'{word}: '
131
  html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
132
 
133
+ # Step 4: Vietnamese Phonetic Transcription
134
+ phonetic_transcription = phonetic(reference_text)
135
+ html_output += f"<br><strong>Phonetic Transcription (Vietnamese):</strong> {phonetic_transcription}<br>"
136
+
137
  return [html_output]
138
 
139
  # Step 4: Text-to-Speech Function
 
155
 
156
  # Return comparison result
157
  return comparison_result
158
+
159
  # Gradio Interface using the updated API
160
  interface = gr.Interface(
161
  fn=gradio_function,
 
163
  gr.Textbox(lines=5, label="Input Paragraph"),
164
  gr.Audio(type="filepath", label="Record Audio")
165
  ],
 
166
  outputs=["html"],
167
+ title="Speech Recognition Comparison with Phonetic Transcription",
168
+ description="Input a paragraph, record your audio, and compare the transcription to the original text. Also, see phonetic transcription for Vietnamese."
169
  )
170
 
171
  # Gradio Interface for Text-to-Speech