Spaces:

tirtohadi
/

EwMeetingNotes

Sleeping

App Files Files Community

EwMeetingNotes / app.py

tirtohadi

Upload folder using huggingface_hub

a146ed0 10 months ago

raw

history blame

1.91 kB

	import gradio as gr
	import whisper
	# Use a pipeline as a high-level helper
	from transformers import pipeline
	from datetime import timedelta

	model_size = "medium"
	model = whisper.load_model(model_size)
	summarizer_pipe = pipeline('summarization', model="sshleifer/distilbart-cnn-12-6")

	def format_seconds(seconds):
	# Create a timedelta object with the given seconds
	delta = timedelta(seconds=seconds)

	# Format the timedelta as a string in the desired format
	formatted_time = str(delta)

	# Extract hours, minutes, and seconds
	hours, remainder = divmod(delta.seconds, 3600)
	minutes, seconds = divmod(remainder, 60)

	# Format milliseconds with three decimal places
	# milliseconds = int(delta.microseconds / 1000)

	# Create the final formatted string
	formatted_string = "{:02}:{:02}:{:02}".format(hours, minutes, seconds)

	return formatted_string


	def process_meeting_video(mp3_path):
	transcription = model.transcribe(mp3_path, word_timestamps=True)
	transcript_text = ""
	for segment in transcription["segments"]:
	start = segment['start']
	end = segment['end']
	text = segment['text']
	if start == 0.0000:
	startTime = "00:00:00"
	else:
	startTime = format_seconds(start)
	endTime = format_seconds(end)
	transcript_text += (f"[{startTime}->{endTime}] {text}\n")

	summary_output = summarizer_pipe(transcription["text"])[0]['summary_text']
	return transcript_text,summary_output

	summary_box = gr.Textbox(label="Summary", lines=10)
	transcription_box = gr.Textbox(label="Transcript", lines=30)
	demo = gr.Interface(process_meeting_video,
	inputs=gr.File(),
	outputs=[transcription_box, summary_box],
	)

	if __name__ == "__main__":
	demo.launch()