Spaces:

HarshitJoshi
/

ASR-Hindi

Sleeping

App Files Files Community

HarshitJoshi commited on Aug 22, 2024

Commit

bd7df7f

•

1 Parent(s): 294ff0d

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -34

app.py CHANGED Viewed

@@ -1,13 +1,19 @@
 from transformers import pipeline
 import gradio as gr
 import os
-model_id = "HarshitJoshi/whisper-small-Hindi"
-pipe = pipeline("automatic-speech-recognition", model=model_id)
-def transcribe_speech(audio):
     output = pipe(
-        audio,
         max_new_tokens=256,
         generate_kwargs={
             "task": "transcribe",
@@ -19,36 +25,18 @@ def transcribe_speech(audio):
     return output["text"]
 example_folder = "./examples"
-example_files = [f for f in os.listdir(example_folder) if f.endswith('.wav') or f.endswith('.mp3')]
-def play_and_transcribe(filename):
-    filepath = os.path.join(example_folder, filename)
-    transcription = transcribe_speech(filepath)
-    return filepath, transcription
-with gr.Blocks() as demo:
-    gr.Markdown("# Hindi Speech Transcription")
-    with gr.Tab("Transcribe"):
-        audio_input = gr.Audio(type="filepath", label="Audio Input")
-        transcribe_button = gr.Button("Transcribe")
-        output_text = gr.Textbox(label="Transcription")
-        transcribe_button.click(
-            fn=transcribe_speech,
-            inputs=audio_input,
-            outputs=output_text
-        )
-    with gr.Tab("Examples"):
-        example_dropdown = gr.Dropdown(choices=example_files, label="Select an example")
-        example_audio = gr.Audio(label="Audio Playback")
-        example_transcription = gr.Textbox(label="Transcription")
-        example_dropdown.change(
-            fn=play_and_transcribe,
-            inputs=example_dropdown,
-            outputs=[example_audio, example_transcription]
-        )
 demo.launch(debug=True)

+import torch
 from transformers import pipeline
 import gradio as gr
 import os
+MODEL_NAME = "HarshitJoshi/whisper-small-Hindi"
+device = 0 if torch.cuda.is_available() else "cpu"
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model=MODEL_NAME,
+    device=device,
+)
+def transcribe_speech(filepath):
     output = pipe(
+        filepath,
         max_new_tokens=256,
         generate_kwargs={
             "task": "transcribe",
     return output["text"]
 example_folder = "./examples"
+demo = gr.Interface(
+    fn=transcribe_speech,
+    inputs=gr.Audio(label="Audio Input", type="filepath"),
+    outputs=gr.Textbox(label="Transcription"),
+    title="Hindi Speech Transcription",
+    description=(
+        "Upload an audio file or record using your microphone to transcribe Hindi speech."
+    ),
+    examples=example_folder,
+    cache_examples=True,
+    allow_flagging="never",
+)
 demo.launch(debug=True)