Spaces:

cfc-tech
/

youtube_summarizer

Sleeping

45c48be verified over 1 year ago

2.68 kB

	import streamlit as st
	from pytube import YouTube
	from moviepy.editor import *
	import torch
	from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
	import librosa

	# Suppress warnings from transformers
	logging.set_verbosity_error()

	# Streamlit interface setup
	st.title("YouTube Video Summarizer")

	youtube_link = st.text_input("Enter YouTube Video Link:")

	if st.button('Summarize'):
	if not youtube_link:
	st.warning("Please enter a valid YouTube link.")
	else:
	with st.spinner("Processing..."):
	try:
	# Download YouTube Video
	yt = YouTube(youtube_link)
	video = yt.streams.filter(only_audio=True).first()
	download_path = video.download()

	# Show progress
	st.progress(25)

	# Extract Audio
	video_clip = AudioFileClip(download_path)
	audio_path = download_path.replace('.mp4', '.wav')
	video_clip.write_audiofile(audio_path)

	# Show progress
	st.progress(50)

	# Speech to Text
	tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/bart-large-cnn")
	model = Wav2Vec2ForCTC.from_pretrained("facebook/bart-large-cnn")

	# Load and process the audio
	speech, _ = librosa.load(audio_path, sr=16000)
	input_values = tokenizer(speech, return_tensors="pt").input_values
	logits = model(input_values).logits
	predicted_ids = torch.argmax(logits, dim=-1)

	# Decode the speech
	transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]

	# Ensure transcription is a string, not empty, and not too short
	if isinstance(transcription, str) and transcription.strip() and len(transcription) > 50:
	# Show progress
	st.progress(75)

	# Initialize the summarizer
	summarizer = pipeline("summarization")

	# Summarization
	summary = summarizer(transcription, max_length=130, min_length=30, do_sample=False)
	st.success("Done!")
	st.write("### Summary:")
	st.write(summary[0]['summary_text'])

	# Final progress
	st.progress(100)
	else:
	st.error("Transcription result is empty, too short, or not a string.")

	except Exception as e:
	st.error(f"An error occurred: {e}")