Spaces:

akadriu
/

shqip_whisper

Sleeping

akadriu commited on Aug 20, 2024

Commit

192be9f

verified ·

1 Parent(s): e0c9526

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 from transformers import pipeline
 import gradio as gr
 # Fetch the token from the environment
 hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
@@ -9,34 +9,37 @@ model_id = "akadriu/whisper-medium-sq"  # update with your model id
 pipe = pipeline("automatic-speech-recognition", model=model_id, token=hf_token)
 def transcribe_speech(filepath):
     output = pipe(
         filepath,
         max_new_tokens=256,
         generate_kwargs={
             "task": "transcribe",
             "language": "albanian",
-        },  # update with the language you've fine-tuned on
         chunk_length_s=30,
         batch_size=8,
     )
     return output["text"]
-import gradio as gr
-demo = gr.Blocks()
 mic_transcribe = gr.Interface(
     fn=transcribe_speech,
-    inputs=gr.Audio(sources="microphone", type="filepath"),
     outputs="text",
 )
 file_transcribe = gr.Interface(
     fn=transcribe_speech,
-    inputs=gr.Audio(sources="upload", type="filepath"),
     outputs="text",
 )
 with demo:
     gr.TabbedInterface(
         [mic_transcribe, file_transcribe],
@@ -44,3 +47,4 @@ with demo:
     )
 demo.launch(debug=True)

 import os
 from transformers import pipeline
 import gradio as gr
+import numpy as np
 # Fetch the token from the environment
 hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
 pipe = pipeline("automatic-speech-recognition", model=model_id, token=hf_token)
 def transcribe_speech(filepath):
+    # Load the audio file into a numpy array
+    if filepath is None:
+        raise ValueError("No audio file provided.")
     output = pipe(
         filepath,
         max_new_tokens=256,
         generate_kwargs={
             "task": "transcribe",
             "language": "albanian",
+        },
         chunk_length_s=30,
         batch_size=8,
     )
     return output["text"]
+# Create Gradio interface
 mic_transcribe = gr.Interface(
     fn=transcribe_speech,
+    inputs=gr.Audio(source="microphone", type="filepath"),  # Removed plural from "sources"
     outputs="text",
 )
 file_transcribe = gr.Interface(
     fn=transcribe_speech,
+    inputs=gr.Audio(source="upload", type="filepath"),  # Removed plural from "sources"
     outputs="text",
 )
+demo = gr.Blocks()
 with demo:
     gr.TabbedInterface(
         [mic_transcribe, file_transcribe],
     )
 demo.launch(debug=True)