bomolopuu commited on
Commit
5ccec7c
·
1 Parent(s): 7dd2fca

claude version

Browse files
Files changed (1) hide show
  1. app.py +22 -21
app.py CHANGED
@@ -7,15 +7,18 @@ from lid import identify, LID_EXAMPLES
7
  def transcribe_multiple_files(audio_files, lang, transcription):
8
  transcriptions = []
9
  for audio_file in audio_files:
10
- audio, _ = librosa.load(audio_file.name)
11
- transcription = transcribe(model, audio, lang, transcription)
12
- transcriptions.append(transcription)
13
- return transcriptions
 
 
 
14
 
15
  mms_transcribe = gr.Interface(
16
- fn=lambda audio_files, lang, transcription: [transcribe(model, audio, lang, transcription) for audio in audio_files],
17
  inputs=[
18
- gr.File(label="Audio Files", file_count="multiple"), # Allow multiple audio files
19
  gr.Dropdown(
20
  [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
21
  label="Language",
@@ -23,21 +26,19 @@ mms_transcribe = gr.Interface(
23
  ),
24
  gr.Textbox(label="Optional: Provide your own transcription"),
25
  ],
26
- outputs="text", # Return a list of transcriptions
27
  title="Speech-to-text",
28
- description=("Transcribe multiple audio files from a microphone or input files in your desired language."),
29
  allow_flagging="never",
30
  )
31
 
32
  mms_identify = gr.Interface(
33
  fn=identify,
34
- inputs=[
35
- gr.Audio(),
36
- ],
37
  outputs=gr.Label(num_top_classes=10),
38
  examples=LID_EXAMPLES,
39
  title="Language Identification",
40
- description=("Identity the language of input audio."),
41
  allow_flagging="never",
42
  )
43
 
@@ -51,24 +52,24 @@ with gr.Blocks() as demo:
51
  "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
52
  )
53
  gr.HTML(
54
- """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos. </center>"""
55
  )
56
  gr.HTML(
57
- """<center>You can also finetune MMS models on your data using the recipes provides here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a> </center>"""
58
  )
59
  gr.HTML(
60
- """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
61
  )
62
 
63
  tabbed_interface.render()
64
  gr.HTML(
65
  """
66
- <div class="footer" style="text-align:center">
67
- <p>
68
- Model by <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> - Gradio Demo by 🤗 Hugging Face
69
- </p>
70
- </div>
71
- """
72
  )
73
 
74
  if __name__ == "__main__":
 
7
  def transcribe_multiple_files(audio_files, lang, transcription):
8
  transcriptions = []
9
  for audio_file in audio_files:
10
+ try:
11
+ audio, sr = librosa.load(audio_file.name)
12
+ result = transcribe(model, audio, lang, transcription)
13
+ transcriptions.append(f"File: {audio_file.name}\nTranscription: {result}\n")
14
+ except Exception as e:
15
+ transcriptions.append(f"Error processing {audio_file.name}: {str(e)}\n")
16
+ return "\n".join(transcriptions)
17
 
18
  mms_transcribe = gr.Interface(
19
+ fn=transcribe_multiple_files,
20
  inputs=[
21
+ gr.File(label="Audio Files", file_count="multiple"),
22
  gr.Dropdown(
23
  [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
24
  label="Language",
 
26
  ),
27
  gr.Textbox(label="Optional: Provide your own transcription"),
28
  ],
29
+ outputs=gr.Textbox(label="Transcriptions", lines=10),
30
  title="Speech-to-text",
31
+ description="Transcribe multiple audio files in your desired language.",
32
  allow_flagging="never",
33
  )
34
 
35
  mms_identify = gr.Interface(
36
  fn=identify,
37
+ inputs=[gr.Audio()],
 
 
38
  outputs=gr.Label(num_top_classes=10),
39
  examples=LID_EXAMPLES,
40
  title="Language Identification",
41
+ description="Identify the language of input audio.",
42
  allow_flagging="never",
43
  )
44
 
 
52
  "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
53
  )
54
  gr.HTML(
55
+ """<center>Click on the appropriate tab to explore Speech-to-text (ASR) and Language identification (LID) demos.</center>"""
56
  )
57
  gr.HTML(
58
+ """<center>You can also finetune MMS models on your data using the recipes provided here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a></center>"""
59
  )
60
  gr.HTML(
61
+ """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
62
  )
63
 
64
  tabbed_interface.render()
65
  gr.HTML(
66
  """
67
+ <div class="footer" style="text-align:center">
68
+ <p>
69
+ Model by <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> - Gradio Demo by 🤗 Hugging Face
70
+ </p>
71
+ </div>
72
+ """
73
  )
74
 
75
  if __name__ == "__main__":