Spaces:
Sleeping
Sleeping
import argparse | |
import os | |
import tempfile | |
import time | |
from functools import wraps | |
from shutil import rmtree | |
import openai | |
import streamlit as st | |
from audio_to_text import transcribe_audio | |
from text_summary import (align_chapters, get_automatic_chapters, | |
summarize_chapters) | |
from youtube_extraction import get_youtube_chapters, youtube_to_audio | |
def timing_decorator(message): | |
def decorator(func): | |
def wrapper(*args, **kwargs): | |
with st.spinner(message): | |
start_time = time.time() | |
result = func(*args, **kwargs) | |
end_time = time.time() | |
st.write(f"{message} complete - {end_time - start_time:.2f}s") | |
return result | |
return wrapper | |
return decorator | |
def download_youtube(youtube_url, work_dir): | |
audio_fpath = youtube_to_audio(youtube_url, work_dir) | |
# Get Youtube chapters, return empty list if is not in metadata | |
yt_chapters = get_youtube_chapters(youtube_url) | |
return audio_fpath, yt_chapters | |
def audio_to_text(audio_fpath): | |
# Transcribe video with Whisper | |
timestamped_text = transcribe_audio(audio_fpath) | |
return timestamped_text | |
def retrieve_chapters(timestamped_text, yt_chapters, openai_api_key): | |
# Get chapters | |
if len(yt_chapters) == 0: | |
chapters = get_automatic_chapters(timestamped_text, openai_api_key) | |
else: | |
chapters = align_chapters(timestamped_text, yt_chapters) | |
return chapters | |
def summarize_youtube_chapters(chapters, openai_api_key): | |
# Summarize chapters | |
summarized_chapters, overall_summary = summarize_chapters(chapters, openai_api_key) | |
return summarized_chapters, overall_summary | |
def get_work_dir(): | |
temp_dir = tempfile.TemporaryDirectory() | |
work_dir = temp_dir.name | |
return work_dir | |
def convert_seconds(seconds): | |
hours = int(seconds // 3600) | |
minutes = int((seconds % 3600) // 60) | |
seconds = int((seconds % 3600) % 60) | |
return f"{hours:02d}:{minutes:02d}:{seconds:02d}" | |
def summarize_video(youtube_url): | |
st.video(youtube_url) | |
# Create a temporary directory to store the audio file | |
work_dir = get_work_dir() | |
# Summarize the video | |
audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir) | |
timestamped_text = audio_to_text(audio_fpath) | |
chapters = retrieve_chapters(timestamped_text, yt_chapters, openai.api_key) | |
summarized_chapters, overall_summary = summarize_youtube_chapters( | |
chapters, openai.api_key | |
) | |
st.write(f"**TLDR:** {overall_summary}") | |
for summarized_chapter in summarized_chapters: | |
start_time = convert_seconds(summarized_chapter["start"]) | |
end_time = convert_seconds(summarized_chapter["end"]) | |
timestamp = f"{start_time} - {end_time}" | |
title = summarized_chapter["title"] | |
summary = summarized_chapter["summary"] | |
transcript = summarized_chapter["text"] | |
# Display the hyperlink with timestamp and title | |
hyperlink = ( | |
f"[{timestamp} - {title}]({youtube_url}&t={summarized_chapter['start']}s)" | |
) | |
st.markdown(hyperlink, unsafe_allow_html=True) | |
st.write(f'Summary: {summary}') | |
# Use an expander for the transcript | |
with st.expander("Show Transcript"): | |
st.write(transcript) | |
rmtree(work_dir) | |
def app(): | |
st.title("Video Summarizer") | |
openai.api_key = os.environ.get("OPENAI_API_KEY") | |
if openai.api_key is None: | |
openai.api_key = st.text_input("OPENAI_API_KEY") | |
youtube_url = st.text_input("Enter a YouTube URL") | |
# Add summarize button | |
summarize_button = st.button("Summarize") | |
if summarize_button: | |
summarize_video(youtube_url) | |
if __name__ == "__main__": | |
app() | |