|
import gradio as gr |
|
from transformers import BartTokenizer, BartForConditionalGeneration |
|
import whisper |
|
|
|
|
|
MODEL_NAME = "facebook/bart-large-cnn" |
|
model = BartForConditionalGeneration.from_pretrained(MODEL_NAME) |
|
tokenizer = BartTokenizer.from_pretrained(MODEL_NAME) |
|
|
|
def convert_and_summarize(audio_path: str) -> str: |
|
|
|
whisper_model = whisper.load_model("base") |
|
result = whisper_model.transcribe(audio_path) |
|
transcribed_text = result["text"] |
|
|
|
|
|
inputs = tokenizer([transcribed_text], max_length=1024, truncation=True, return_tensors='pt') |
|
summary_ids = model.generate(inputs['input_ids']) |
|
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
|
|
|
return summary |
|
|
|
audio_input = gr.inputs.Audio(type="filepath") |
|
|
|
|
|
iface = gr.Interface( |
|
fn=convert_and_summarize, |
|
inputs=audio_input, |
|
outputs="text", |
|
title="Audio-to-Summarized-Text", |
|
description="Upload an audio here and get a bullet-point summary of its content.", |
|
theme="Monochrome", |
|
live=True, |
|
capture_session=True, |
|
) |
|
|
|
iface.launch() |