MMS_1_10

Running

App Files Files Community

bomolopuu commited on Oct 1, 2024

Commit

5ccec7c

1 Parent(s): 7dd2fca

claude version

Browse files

Files changed (1) hide show

app.py +22 -21

app.py CHANGED Viewed

@@ -7,15 +7,18 @@ from lid import identify, LID_EXAMPLES
 def transcribe_multiple_files(audio_files, lang, transcription):
     transcriptions = []
     for audio_file in audio_files:
-        audio, _ = librosa.load(audio_file.name)
-        transcription = transcribe(model, audio, lang, transcription)
-        transcriptions.append(transcription)
-    return transcriptions
 mms_transcribe = gr.Interface(
-    fn=lambda audio_files, lang, transcription: [transcribe(model, audio, lang, transcription) for audio in audio_files],
     inputs=[
-        gr.File(label="Audio Files", file_count="multiple"),  # Allow multiple audio files
         gr.Dropdown(
             [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
             label="Language",
@@ -23,21 +26,19 @@ mms_transcribe = gr.Interface(
         ),
         gr.Textbox(label="Optional: Provide your own transcription"),
     ],
-    outputs="text",  # Return a list of transcriptions
     title="Speech-to-text",
-    description=("Transcribe multiple audio files from a microphone or input files in your desired language."),
     allow_flagging="never",
 )
 mms_identify = gr.Interface(
     fn=identify,
-    inputs=[
-        gr.Audio(),
-    ],
     outputs=gr.Label(num_top_classes=10),
     examples=LID_EXAMPLES,
     title="Language Identification",
-    description=("Identity the language of input audio."),
     allow_flagging="never",
 )
@@ -51,24 +52,24 @@ with gr.Blocks() as demo:
         "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
     )
     gr.HTML(
-        """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos.   </center>"""
     )
     gr.HTML(
-        """<center>You can also finetune MMS models on your data using the recipes provides here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a>  </center>"""
     )
     gr.HTML(
-        """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true"  style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
     )
     tabbed_interface.render()
     gr.HTML(
         """
-            <div class="footer" style="text-align:center">
-                <p>
-                    Model by <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> - Gradio Demo by 🤗 Hugging Face
-                </p>
-            </div>
-           """
     )
 if __name__ == "__main__":

 def transcribe_multiple_files(audio_files, lang, transcription):
     transcriptions = []
     for audio_file in audio_files:
+        try:
+            audio, sr = librosa.load(audio_file.name)
+            result = transcribe(model, audio, lang, transcription)
+            transcriptions.append(f"File: {audio_file.name}\nTranscription: {result}\n")
+        except Exception as e:
+            transcriptions.append(f"Error processing {audio_file.name}: {str(e)}\n")
+    return "\n".join(transcriptions)
 mms_transcribe = gr.Interface(
+    fn=transcribe_multiple_files,
     inputs=[
+        gr.File(label="Audio Files", file_count="multiple"),
         gr.Dropdown(
             [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
             label="Language",
         ),
         gr.Textbox(label="Optional: Provide your own transcription"),
     ],
+    outputs=gr.Textbox(label="Transcriptions", lines=10),
     title="Speech-to-text",
+    description="Transcribe multiple audio files in your desired language.",
     allow_flagging="never",
 )
 mms_identify = gr.Interface(
     fn=identify,
+    inputs=[gr.Audio()],
     outputs=gr.Label(num_top_classes=10),
     examples=LID_EXAMPLES,
     title="Language Identification",
+    description="Identify the language of input audio.",
     allow_flagging="never",
 )
         "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
     )
     gr.HTML(
+        """<center>Click on the appropriate tab to explore Speech-to-text (ASR) and Language identification (LID) demos.</center>"""
     )
     gr.HTML(
+        """<center>You can also finetune MMS models on your data using the recipes provided here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a></center>"""
     )
     gr.HTML(
+        """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
     )
     tabbed_interface.render()
     gr.HTML(
         """
+        <div class="footer" style="text-align:center">
+            <p>
+                Model by <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> - Gradio Demo by 🤗 Hugging Face
+            </p>
+        </div>
+        """
     )
 if __name__ == "__main__":