Spaces:

akadriu
/

shqip_whisper

Sleeping

File size: 1,241 Bytes

eda98d9
1b6f227
6723651
0d320bd
eda98d9
2834118
033e2bc
 
 
1b6f227
 
a878076
192be9f
 
 
a878076
1b6f227
 
 
 
 
 
192be9f
1b6f227
 
 
 
 
a878076
1b6f227
 
a878076
e0c9526
1b6f227
 
 
 
a878076
2faa91c
fa87838
 
192be9f
 
1b6f227
 
 
 
 
 
 
192be9f
a878076

import os
from transformers import pipeline
import gradio as gr

# Fetch the token from the environment
hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
model_id = "akadriu/whisper-medium-sq"  # update with your model id
#model_id ="./"
pipe = pipeline("automatic-speech-recognition", model=model_id, token=hf_token)

def transcribe_speech(filepath):
    # Check if the filepath is valid
    if filepath is None:
        raise ValueError("No audio file provided.")
    
    # Perform speech transcription
    output = pipe(
        filepath,
        max_new_tokens=256,
        generate_kwargs={
            "task": "transcribe",
            "language": "albanian",
        },
        chunk_length_s=30,
        batch_size=8,
    )
    return output["text"]

# Create Gradio interfaces without the 'source' argument
mic_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(type="filepath"),
    outputs="text",
)

file_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(type="filepath"),
    outputs="text",
)

demo = gr.Blocks()

with demo:
    gr.TabbedInterface(
        [mic_transcribe, file_transcribe],
        ["Transcribe Microphone", "Transcribe Audio File"],
    )

demo.launch(debug=True)