huckiyang commited on
Commit
00839fe
·
1 Parent(s): 0304b8c

[zh] add options for zh

Browse files
Files changed (2) hide show
  1. app.py +87 -12
  2. requirements.txt +1 -0
app.py CHANGED
@@ -26,6 +26,17 @@ try:
26
  subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "espnet_model_zoo"])
27
  import espnet_model_zoo
28
  print("espnet_model_zoo installed successfully.")
 
 
 
 
 
 
 
 
 
 
 
29
  except ModuleNotFoundError as e:
30
  missing_module = str(e).split("'")[1]
31
  print(f"Installing missing module: {missing_module}")
@@ -48,6 +59,15 @@ except ModuleNotFoundError as e:
48
  print("Installing espnet_model_zoo. This may take a few minutes...")
49
  subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "espnet_model_zoo"])
50
  import espnet_model_zoo
 
 
 
 
 
 
 
 
 
51
  print("All required packages installed successfully.")
52
  except ModuleNotFoundError as e:
53
  print(f"Failed to install {str(e).split('No module named ')[1]}. Please install manually.")
@@ -121,8 +141,29 @@ def transcribe_english(audio_file):
121
  return transcribe_audio(audio_file, "<eng>")
122
 
123
  # Function to handle Chinese transcription
124
- def transcribe_chinese(audio_file):
125
- return transcribe_audio(audio_file, "<zho>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  # Function to handle Japanese transcription
128
  def transcribe_japanese(audio_file):
@@ -186,6 +227,24 @@ with demo:
186
  value="English"
187
  )
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  with gr.Row():
190
  with gr.Column():
191
  mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record Audio")
@@ -200,7 +259,7 @@ with demo:
200
  mic_feedback_btn = gr.Button("Submit Feedback")
201
  mic_feedback_msg = gr.Textbox(label="Feedback Status", visible=True)
202
 
203
- def transcribe_mic(audio, lang):
204
  lang_map = {
205
  "English": "<eng>",
206
  "Mandarin": "<zho>",
@@ -210,18 +269,23 @@ with demo:
210
  "Italian": "<ita>",
211
  "German": "<deu>"
212
  }
 
 
 
 
 
213
  return transcribe_audio(audio, lang_map.get(lang, "<eng>"))
214
 
215
- mic_button.click(fn=transcribe_mic, inputs=[mic_input, language_mic], outputs=mic_output)
216
 
217
  # Add feedback submission function
218
- def submit_mic_feedback(transcription, rating, language):
219
  lang_name = language # Already a string like "English"
220
- return save_feedback(transcription, rating, lang_name)
221
 
222
  mic_feedback_btn.click(
223
  fn=submit_mic_feedback,
224
- inputs=[mic_output, mic_rating, language_mic],
225
  outputs=mic_feedback_msg
226
  )
227
 
@@ -260,6 +324,13 @@ with demo:
260
  )
261
 
262
  with gr.TabItem("Mandarin"):
 
 
 
 
 
 
 
263
  with gr.Row():
264
  with gr.Column():
265
  zh_input = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio")
@@ -281,15 +352,19 @@ with demo:
281
  inputs=zh_input
282
  )
283
 
284
- zh_button.click(fn=transcribe_chinese, inputs=zh_input, outputs=zh_output)
 
 
285
 
286
- # Add feedback submission
287
- def submit_zh_feedback(transcription, rating, audio_path):
288
- return save_feedback(transcription, rating, "Mandarin", audio_path)
 
 
289
 
290
  zh_feedback_btn.click(
291
  fn=submit_zh_feedback,
292
- inputs=[zh_output, zh_rating, zh_input],
293
  outputs=zh_feedback_msg
294
  )
295
 
 
26
  subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "espnet_model_zoo"])
27
  import espnet_model_zoo
28
  print("espnet_model_zoo installed successfully.")
29
+
30
+ # Check for opencc-python-reimplemented
31
+ try:
32
+ from opencc import OpenCC
33
+ print("OpenCC already installed.")
34
+ except ModuleNotFoundError:
35
+ print("Installing opencc-python-reimplemented. This may take a moment...")
36
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "opencc-python-reimplemented"])
37
+ from opencc import OpenCC
38
+ print("OpenCC installed successfully.")
39
+
40
  except ModuleNotFoundError as e:
41
  missing_module = str(e).split("'")[1]
42
  print(f"Installing missing module: {missing_module}")
 
59
  print("Installing espnet_model_zoo. This may take a few minutes...")
60
  subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "espnet_model_zoo"])
61
  import espnet_model_zoo
62
+
63
+ # Also check for OpenCC
64
+ try:
65
+ from opencc import OpenCC
66
+ except ModuleNotFoundError:
67
+ print("Installing opencc-python-reimplemented. This may take a moment...")
68
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "opencc-python-reimplemented"])
69
+ from opencc import OpenCC
70
+
71
  print("All required packages installed successfully.")
72
  except ModuleNotFoundError as e:
73
  print(f"Failed to install {str(e).split('No module named ')[1]}. Please install manually.")
 
141
  return transcribe_audio(audio_file, "<eng>")
142
 
143
  # Function to handle Chinese transcription
144
+ def transcribe_chinese(audio_file, chinese_variant="Traditional"):
145
+ """
146
+ Process the audio file and return Chinese transcription in simplified or traditional characters
147
+
148
+ Args:
149
+ audio_file: Path to the audio file
150
+ chinese_variant: Either "Simplified" or "Traditional"
151
+ """
152
+ # First get the base transcription
153
+ asr_text = transcribe_audio(audio_file, "<zho>")
154
+
155
+ # Convert between simplified and traditional Chinese if needed
156
+ if chinese_variant == "Traditional":
157
+ # Convert simplified to traditional
158
+ cc = OpenCC('s2twp') # s2twp: Simplified to Traditional (Taiwan standard with phrases)
159
+ asr_text = cc.convert(asr_text)
160
+ elif chinese_variant == "Simplified" and not asr_text.isascii():
161
+ # If the text contains non-ASCII characters, it might be traditional
162
+ # Convert traditional to simplified just to be safe
163
+ cc = OpenCC('tw2sp') # tw2sp: Traditional (Taiwan standard) to Simplified with phrases
164
+ asr_text = cc.convert(asr_text)
165
+
166
+ return asr_text
167
 
168
  # Function to handle Japanese transcription
169
  def transcribe_japanese(audio_file):
 
227
  value="English"
228
  )
229
 
230
+ # Add Chinese variant selection that appears only when Mandarin is selected
231
+ chinese_variant_mic = gr.Radio(
232
+ ["Simplified", "Traditional"],
233
+ label="Chinese Character Variant",
234
+ value="Traditional",
235
+ visible=False
236
+ )
237
+
238
+ # Make Chinese variant selection visible only when Mandarin is selected
239
+ def update_chinese_variant_visibility(lang):
240
+ return gr.update(visible=(lang == "Mandarin"))
241
+
242
+ language_mic.change(
243
+ fn=update_chinese_variant_visibility,
244
+ inputs=language_mic,
245
+ outputs=chinese_variant_mic
246
+ )
247
+
248
  with gr.Row():
249
  with gr.Column():
250
  mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record Audio")
 
259
  mic_feedback_btn = gr.Button("Submit Feedback")
260
  mic_feedback_msg = gr.Textbox(label="Feedback Status", visible=True)
261
 
262
+ def transcribe_mic(audio, lang, chinese_variant=None):
263
  lang_map = {
264
  "English": "<eng>",
265
  "Mandarin": "<zho>",
 
269
  "Italian": "<ita>",
270
  "German": "<deu>"
271
  }
272
+
273
+ # Special handling for Chinese with variant selection
274
+ if lang == "Mandarin" and chinese_variant:
275
+ return transcribe_chinese(audio, chinese_variant.lower())
276
+
277
  return transcribe_audio(audio, lang_map.get(lang, "<eng>"))
278
 
279
+ mic_button.click(fn=transcribe_mic, inputs=[mic_input, language_mic, chinese_variant_mic], outputs=mic_output)
280
 
281
  # Add feedback submission function
282
+ def submit_mic_feedback(transcription, rating, language, chinese_variant):
283
  lang_name = language # Already a string like "English"
284
+ return save_feedback(transcription, rating, f"{lang_name} ({chinese_variant})", None)
285
 
286
  mic_feedback_btn.click(
287
  fn=submit_mic_feedback,
288
+ inputs=[mic_output, mic_rating, language_mic, chinese_variant_mic],
289
  outputs=mic_feedback_msg
290
  )
291
 
 
324
  )
325
 
326
  with gr.TabItem("Mandarin"):
327
+ # Add Chinese variant selection
328
+ chinese_variant = gr.Radio(
329
+ ["Simplified", "Traditional"],
330
+ label="Chinese Character Variant",
331
+ value="Simplified"
332
+ )
333
+
334
  with gr.Row():
335
  with gr.Column():
336
  zh_input = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio")
 
352
  inputs=zh_input
353
  )
354
 
355
+ # Update the click function to include the Chinese variant
356
+ def transcribe_chinese_with_variant(audio_file, variant):
357
+ return transcribe_chinese(audio_file, variant.lower())
358
 
359
+ zh_button.click(fn=transcribe_chinese_with_variant, inputs=[zh_input, chinese_variant], outputs=zh_output)
360
+
361
+ # Update feedback submission to include variant
362
+ def submit_zh_feedback(transcription, rating, audio_path, variant):
363
+ return save_feedback(transcription, rating, f"Mandarin ({variant})", audio_path)
364
 
365
  zh_feedback_btn.click(
366
  fn=submit_zh_feedback,
367
+ inputs=[zh_output, zh_rating, zh_input, chinese_variant],
368
  outputs=zh_feedback_msg
369
  )
370
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  gradio
2
  espnet_model_zoo
3
  espnet
 
1
+ opencc-python-reimplemented
2
  gradio
3
  espnet_model_zoo
4
  espnet