File size: 3,964 Bytes
e3d3533
 
 
 
 
 
 
dd1ba34
e3d3533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd1ba34
e3d3533
 
dd1ba34
e3d3533
 
 
 
 
 
dd1ba34
e3d3533
2458b22
 
e3d3533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd1ba34
 
 
 
e3d3533
 
 
 
 
 
 
2458b22
e3d3533
 
 
2458b22
e3d3533
 
 
 
 
 
 
2458b22
 
 
 
e3d3533
 
 
 
 
ffb62ba
dd1ba34
 
 
e3d3533
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import argparse
import os
import tempfile
import time
from functools import wraps
from shutil import rmtree

import openai
import streamlit as st

from audio_to_text import transcribe_audio
from text_summary import (align_chapters, get_automatic_chapters,
                          summarize_chapters)
from youtube_extraction import get_youtube_chapters, youtube_to_audio


def timing_decorator(message):
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            with st.spinner(message):
                start_time = time.time()
                result = func(*args, **kwargs)
                end_time = time.time()
                st.write(f"{message} complete - {end_time - start_time:.2f}s")
                return result

        return wrapper

    return decorator


@timing_decorator("Downloading Youtube video")
def download_youtube(youtube_url, work_dir):
    audio_fpath = youtube_to_audio(youtube_url, work_dir)
    # Get Youtube chapters, return empty list if is not in metadata
    yt_chapters = get_youtube_chapters(youtube_url)
    return audio_fpath, yt_chapters


@timing_decorator("Transcribing audio")
def audio_to_text(audio_fpath):
    # Transcribe video with Whisper
    timestamped_text = transcribe_audio(audio_fpath)
    return timestamped_text


@timing_decorator("Retrieving chapters")
def retrieve_chapters(timestamped_text, yt_chapters, openai_api_key):
    # Get chapters
    if len(yt_chapters) == 0:
        chapters = get_automatic_chapters(timestamped_text, openai_api_key)
    else:
        chapters = align_chapters(timestamped_text, yt_chapters)
    return chapters


@timing_decorator("Summarizing video")
def summarize_youtube_chapters(chapters, openai_api_key):
    # Summarize chapters
    summarized_chapters, overall_summary = summarize_chapters(chapters, openai_api_key)
    return summarized_chapters, overall_summary


def get_work_dir():
    temp_dir = tempfile.TemporaryDirectory()
    work_dir = temp_dir.name
    return work_dir


def convert_seconds(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds = int((seconds % 3600) % 60)

    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"


def summarize_video(youtube_url):
    st.video(youtube_url)
    # Create a temporary directory to store the audio file
    work_dir = get_work_dir()

    # Summarize the video
    audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir)
    timestamped_text = audio_to_text(audio_fpath)

    chapters = retrieve_chapters(timestamped_text, yt_chapters, openai.api_key)
    summarized_chapters, overall_summary = summarize_youtube_chapters(
        chapters, openai.api_key
    )

    st.write(f"**TLDR:** {overall_summary}")

    for summarized_chapter in summarized_chapters:
        start_time = convert_seconds(summarized_chapter["start"])
        end_time = convert_seconds(summarized_chapter["end"])


        timestamp = f"{start_time} - {end_time}"
        title = summarized_chapter["title"]
        summary = summarized_chapter["summary"]
        transcript = summarized_chapter["text"]

        # Display the hyperlink with timestamp and title
        hyperlink = (
            f"[{timestamp} - {title}]({youtube_url}&t={summarized_chapter['start']}s)"
        )
        st.markdown(hyperlink, unsafe_allow_html=True)

        st.write(f'Summary: {summary}')
        # Use an expander for the transcript
        with st.expander("Show Transcript"):
            st.write(transcript)
    rmtree(work_dir)


def app():
    st.title("Video Summarizer")
    openai.api_key = os.environ.get("OPENAI_API_KEY")
    if openai.api_key is None:
        openai.api_key = st.text_input("OPENAI_API_KEY")

    youtube_url = st.text_input("Enter a YouTube URL")

    # Add summarize button
    summarize_button = st.button("Summarize")

    if summarize_button:
        summarize_video(youtube_url)


if __name__ == "__main__":
    app()