Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisper | |
# Use a pipeline as a high-level helper | |
from transformers import pipeline | |
from datetime import timedelta | |
model_size = "medium" | |
model = whisper.load_model(model_size) | |
summarizer_pipe = pipeline('summarization', model="sshleifer/distilbart-cnn-12-6") | |
def format_seconds(seconds): | |
# Create a timedelta object with the given seconds | |
delta = timedelta(seconds=seconds) | |
# Format the timedelta as a string in the desired format | |
formatted_time = str(delta) | |
# Extract hours, minutes, and seconds | |
hours, remainder = divmod(delta.seconds, 3600) | |
minutes, seconds = divmod(remainder, 60) | |
# Format milliseconds with three decimal places | |
# milliseconds = int(delta.microseconds / 1000) | |
# Create the final formatted string | |
formatted_string = "{:02}:{:02}:{:02}".format(hours, minutes, seconds) | |
return formatted_string | |
def process_meeting_video(mp3_path): | |
transcription = model.transcribe(mp3_path, word_timestamps=True) | |
transcript_text = "" | |
for segment in transcription["segments"]: | |
start = segment['start'] | |
end = segment['end'] | |
text = segment['text'] | |
if start == 0.0000: | |
startTime = "00:00:00" | |
else: | |
startTime = format_seconds(start) | |
endTime = format_seconds(end) | |
transcript_text += (f"[{startTime}->{endTime}] {text}\n") | |
summary_output = summarizer_pipe(transcription["text"])[0]['summary_text'] | |
return transcript_text,summary_output | |
summary_box = gr.Textbox(label="Summary", lines=10) | |
transcription_box = gr.Textbox(label="Transcript", lines=30) | |
demo = gr.Interface(process_meeting_video, | |
inputs=gr.File(), | |
outputs=[transcription_box, summary_box], | |
) | |
if __name__ == "__main__": | |
demo.launch() |