Spaces:

Utpal21022102
/

audio3text

Runtime error

UtpaL2102 commited on Nov 30, 2024

Commit

ea23c90

1 Parent(s): ca6e3e7

Add application file

Files changed (1) hide show

app.py ADDED Viewed

+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+import gradio as gr
+# Load model and processor
+model_id = "sanket003/whisper-darpg"
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_id, torch_dtype=torch.float32, low_cpu_mem_usage=False, use_safetensors=True
+)
+processor = AutoProcessor.from_pretrained(model_id)
+# Define the pipeline
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    torch_dtype=torch.float32,
+    generate_kwargs={"language": "english"},
+    return_timestamps=True
+)
+# Define the Gradio interface function
+def transcribe_audio(audio, file):
+    if audio:
+        result = pipe(audio)
+    elif file:
+        result = pipe(file)
+    else:
+        result = {"text": "No input provided."}
+    return result["text"]
+# Gradio interface
+iface = gr.Interface(
+    title="Transforming Speech into Text",
+    fn=transcribe_audio,
+    inputs=[
+        gr.Audio(source="microphone", type="filepath", label="Record from Microphone"),
+        gr.File(type="filepath", label="Upload Audio File"),
+    ],
+    outputs=["textbox"],
+    description="Choose either microphone input or upload an audio file.",
+)
+# Run the app
+if __name__ == "__main__":
+    iface.launch(share=True, debug=True)