import argparse import os import tempfile import time from functools import wraps from shutil import rmtree import openai import streamlit as st from audio_to_text import transcribe_audio from text_summary import (align_chapters, get_automatic_chapters, summarize_chapters) from youtube_extraction import get_youtube_chapters, youtube_to_audio def timing_decorator(message): def decorator(func): @wraps(func) def wrapper(*args, **kwargs): with st.spinner(message): start_time = time.time() result = func(*args, **kwargs) end_time = time.time() st.write(f"{message} complete - {end_time - start_time:.2f}s") return result return wrapper return decorator @timing_decorator("Downloading Youtube video") def download_youtube(youtube_url, work_dir): audio_fpath = youtube_to_audio(youtube_url, work_dir) # Get Youtube chapters, return empty list if is not in metadata yt_chapters = get_youtube_chapters(youtube_url) return audio_fpath, yt_chapters @timing_decorator("Transcribing audio") def audio_to_text(audio_fpath): # Transcribe video with Whisper timestamped_text = transcribe_audio(audio_fpath) return timestamped_text @timing_decorator("Retrieving chapters") def retrieve_chapters(timestamped_text, yt_chapters, openai_api_key): # Get chapters if len(yt_chapters) == 0: chapters = get_automatic_chapters(timestamped_text, openai_api_key) else: chapters = align_chapters(timestamped_text, yt_chapters) return chapters @timing_decorator("Summarizing video") def summarize_youtube_chapters(chapters, openai_api_key): # Summarize chapters summarized_chapters, overall_summary = summarize_chapters(chapters, openai_api_key) return summarized_chapters, overall_summary def get_work_dir(): temp_dir = tempfile.TemporaryDirectory() work_dir = temp_dir.name return work_dir def convert_seconds(seconds): hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) seconds = int((seconds % 3600) % 60) return f"{hours:02d}:{minutes:02d}:{seconds:02d}" def summarize_video(youtube_url): st.video(youtube_url) # Create a temporary directory to store the audio file work_dir = get_work_dir() # Summarize the video audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir) timestamped_text = audio_to_text(audio_fpath) chapters = retrieve_chapters(timestamped_text, yt_chapters, openai.api_key) summarized_chapters, overall_summary = summarize_youtube_chapters( chapters, openai.api_key ) st.write(f"**TLDR:** {overall_summary}") for summarized_chapter in summarized_chapters: start_time = convert_seconds(summarized_chapter["start"]) end_time = convert_seconds(summarized_chapter["end"]) timestamp = f"{start_time} - {end_time}" title = summarized_chapter["title"] summary = summarized_chapter["summary"] transcript = summarized_chapter["text"] # Display the hyperlink with timestamp and title hyperlink = ( f"[{timestamp} - {title}]({youtube_url}&t={summarized_chapter['start']}s)" ) st.markdown(hyperlink, unsafe_allow_html=True) st.write(f'Summary: {summary}') # Use an expander for the transcript with st.expander("Show Transcript"): st.write(transcript) rmtree(work_dir) def app(): st.title("Video Summarizer") openai.api_key = os.environ.get("OPENAI_API_KEY") if openai.api_key is None: openai.api_key = st.text_input("OPENAI_API_KEY") youtube_url = st.text_input("Enter a YouTube URL") # Add summarize button summarize_button = st.button("Summarize") if summarize_button: summarize_video(youtube_url) if __name__ == "__main__": app()