Spaces:

ammansik
/

youtube_summarizer

Sleeping

File size: 3,964 Bytes

import argparse
import os
import tempfile
import time
from functools import wraps
from shutil import rmtree

import openai
import streamlit as st

from audio_to_text import transcribe_audio
from text_summary import (align_chapters, get_automatic_chapters,
                          summarize_chapters)
from youtube_extraction import get_youtube_chapters, youtube_to_audio


def timing_decorator(message):
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            with st.spinner(message):
                start_time = time.time()
                result = func(*args, **kwargs)
                end_time = time.time()
                st.write(f"{message} complete - {end_time - start_time:.2f}s")
                return result

        return wrapper

    return decorator


@timing_decorator("Downloading Youtube video")
def download_youtube(youtube_url, work_dir):
    audio_fpath = youtube_to_audio(youtube_url, work_dir)
    # Get Youtube chapters, return empty list if is not in metadata
    yt_chapters = get_youtube_chapters(youtube_url)
    return audio_fpath, yt_chapters


@timing_decorator("Transcribing audio")
def audio_to_text(audio_fpath):
    # Transcribe video with Whisper
    timestamped_text = transcribe_audio(audio_fpath)
    return timestamped_text


@timing_decorator("Retrieving chapters")
def retrieve_chapters(timestamped_text, yt_chapters, openai_api_key):
    # Get chapters
    if len(yt_chapters) == 0:
        chapters = get_automatic_chapters(timestamped_text, openai_api_key)
    else:
        chapters = align_chapters(timestamped_text, yt_chapters)
    return chapters


@timing_decorator("Summarizing video")
def summarize_youtube_chapters(chapters, openai_api_key):
    # Summarize chapters
    summarized_chapters, overall_summary = summarize_chapters(chapters, openai_api_key)
    return summarized_chapters, overall_summary


def get_work_dir():
    temp_dir = tempfile.TemporaryDirectory()
    work_dir = temp_dir.name
    return work_dir


def convert_seconds(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds = int((seconds % 3600) % 60)

    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"


def summarize_video(youtube_url):
    st.video(youtube_url)
    # Create a temporary directory to store the audio file
    work_dir = get_work_dir()

    # Summarize the video
    audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir)
    timestamped_text = audio_to_text(audio_fpath)

    chapters = retrieve_chapters(timestamped_text, yt_chapters, openai.api_key)
    summarized_chapters, overall_summary = summarize_youtube_chapters(
        chapters, openai.api_key
    )

    st.write(f"**TLDR:** {overall_summary}")

    for summarized_chapter in summarized_chapters:
        start_time = convert_seconds(summarized_chapter["start"])
        end_time = convert_seconds(summarized_chapter["end"])


        timestamp = f"{start_time} - {end_time}"
        title = summarized_chapter["title"]
        summary = summarized_chapter["summary"]
        transcript = summarized_chapter["text"]

        # Display the hyperlink with timestamp and title
        hyperlink = (
            f"[{timestamp} - {title}]({youtube_url}&t={summarized_chapter['start']}s)"
        )
        st.markdown(hyperlink, unsafe_allow_html=True)

        st.write(f'Summary: {summary}')
        # Use an expander for the transcript
        with st.expander("Show Transcript"):
            st.write(transcript)
    rmtree(work_dir)


def app():
    st.title("Video Summarizer")
    openai.api_key = os.environ.get("OPENAI_API_KEY")
    if openai.api_key is None:
        openai.api_key = st.text_input("OPENAI_API_KEY")

    youtube_url = st.text_input("Enter a YouTube URL")

    # Add summarize button
    summarize_button = st.button("Summarize")

    if summarize_button:
        summarize_video(youtube_url)


if __name__ == "__main__":
    app()