import gradio as gr import whisper # Use a pipeline as a high-level helper from transformers import pipeline from datetime import timedelta model_size = "medium" model = whisper.load_model(model_size) summarizer_pipe = pipeline('summarization', model="sshleifer/distilbart-cnn-12-6") def format_seconds(seconds): # Create a timedelta object with the given seconds delta = timedelta(seconds=seconds) # Format the timedelta as a string in the desired format formatted_time = str(delta) # Extract hours, minutes, and seconds hours, remainder = divmod(delta.seconds, 3600) minutes, seconds = divmod(remainder, 60) # Format milliseconds with three decimal places # milliseconds = int(delta.microseconds / 1000) # Create the final formatted string formatted_string = "{:02}:{:02}:{:02}".format(hours, minutes, seconds) return formatted_string def process_meeting_video(mp3_path): transcription = model.transcribe(mp3_path, word_timestamps=True) transcript_text = "" for segment in transcription["segments"]: start = segment['start'] end = segment['end'] text = segment['text'] if start == 0.0000: startTime = "00:00:00" else: startTime = format_seconds(start) endTime = format_seconds(end) transcript_text += (f"[{startTime}->{endTime}] {text}\n") summary_output = summarizer_pipe(transcription["text"])[0]['summary_text'] return transcript_text,summary_output summary_box = gr.Textbox(label="Summary", lines=10) transcription_box = gr.Textbox(label="Transcript", lines=30) demo = gr.Interface(process_meeting_video, inputs=gr.File(), outputs=[transcription_box, summary_box], ) if __name__ == "__main__": demo.launch()