Spaces:

wjbmattingly
/

whisper-app

Sleeping

App Files Files Community

wjbmattingly commited on Aug 17, 2024

Commit

17abae8

1 Parent(s): 0873af8

changed app design

Browse files

Files changed (2) hide show

.DS_Store +0 -0
app.py +28 -30

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -1,48 +1,46 @@
 import gradio as gr
 from transformers import WhisperProcessor, WhisperForConditionalGeneration
 import torch
-import librosa
 import spaces
 # Load the model and processor
-model_name = "TheirStory/whisper-small-xhosa"
-processor = WhisperProcessor.from_pretrained(model_name)
-model = WhisperForConditionalGeneration.from_pretrained(model_name)
-@spaces.GPU
-def transcribe_audio(audio):
-    if torch.cuda.is_available():
-        model = model.to("cuda")
-    # Load the audio file
-    if isinstance(audio, str):  # If it's a file path
-        audio_array, sampling_rate = librosa.load(audio, sr=16000)
-    else:  # If it's a tuple (audio_array, sampling_rate)
-        audio_array, sampling_rate = audio
-    # Process the audio
-    input_features = processor(audio_array, sampling_rate=sampling_rate, return_tensors="pt").input_features
-    if torch.cuda.is_available():
-        input_features = input_features.to("cuda")
-    # Generate token ids
-    generated_ids = model.generate(input_features)
-    # Decode token ids to text
-    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return transcription
-# Create the Gradio interface
-iface = gr.Interface(
-    fn=transcribe_audio,
     inputs=[
-        gr.Audio(type="filepath", label="Upload or Record an Audio File")
     ],
     outputs="text",
-    title="Xhosa Audio Transcription",
-    description="Record or upload Xhosa audio to get its transcription using the TheirStory/whisper-small-xhosa model."
 )
 # Launch the app

 import gradio as gr
 from transformers import WhisperProcessor, WhisperForConditionalGeneration
 import torch
+from transformers import pipeline
 import spaces
+BATCH_SIZE = 8
 # Load the model and processor
+MODEL_NAME = "TheirStory/whisper-small-xhosa"
+device = 0 if torch.cuda.is_available() else "cpu"
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model=MODEL_NAME,
+    chunk_length_s=30,
+    device=device,
+)
+@spaces.GPU
+def transcribe(inputs, task):
+    if inputs is None:
+        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
+    return  text
+file_transcribe = gr.Interface(
+    fn=transcribe,
     inputs=[
+        gr.Audio(type="filepath", label="Audio file"),
+        # gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
     ],
     outputs="text",
+    theme="huggingface",
+    title="Whisper App",
+    description=(
+        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the OpenAI Whisper"
+        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
+        " of arbitrary length."
+    ),
+    allow_flagging="never",
 )
 # Launch the app