Spaces:
Sleeping
Sleeping
refactoring
Browse files- app.py +13 -6
- text_summary.py +15 -13
app.py
CHANGED
@@ -5,6 +5,7 @@ import time
|
|
5 |
from functools import wraps
|
6 |
from shutil import rmtree
|
7 |
|
|
|
8 |
import streamlit as st
|
9 |
|
10 |
from audio_to_text import transcribe_audio
|
@@ -45,19 +46,19 @@ def audio_to_text(audio_fpath):
|
|
45 |
|
46 |
|
47 |
@timing_decorator("Retrieving chapters")
|
48 |
-
def retrieve_chapters(timestamped_text, yt_chapters):
|
49 |
# Get chapters
|
50 |
if len(yt_chapters) == 0:
|
51 |
-
chapters = get_automatic_chapters(timestamped_text)
|
52 |
else:
|
53 |
chapters = align_chapters(timestamped_text, yt_chapters)
|
54 |
return chapters
|
55 |
|
56 |
|
57 |
@timing_decorator("Summarizing video")
|
58 |
-
def summarize_youtube_chapters(chapters):
|
59 |
# Summarize chapters
|
60 |
-
summarized_chapters = summarize_chapters(chapters)
|
61 |
return summarized_chapters
|
62 |
|
63 |
|
@@ -84,8 +85,10 @@ def summarize_video(youtube_url):
|
|
84 |
audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir)
|
85 |
timestamped_text = audio_to_text(audio_fpath)
|
86 |
|
87 |
-
chapters = retrieve_chapters(timestamped_text, yt_chapters)
|
88 |
-
summarized_chapters, overall_summary = summarize_youtube_chapters(
|
|
|
|
|
89 |
|
90 |
st.write(f"**TLDR:** {overall_summary}")
|
91 |
|
@@ -109,6 +112,10 @@ def summarize_video(youtube_url):
|
|
109 |
|
110 |
def app():
|
111 |
st.title("Video Summarizer")
|
|
|
|
|
|
|
|
|
112 |
youtube_url = st.text_input("Enter a YouTube URL")
|
113 |
|
114 |
# Add summarize button
|
|
|
5 |
from functools import wraps
|
6 |
from shutil import rmtree
|
7 |
|
8 |
+
import openai
|
9 |
import streamlit as st
|
10 |
|
11 |
from audio_to_text import transcribe_audio
|
|
|
46 |
|
47 |
|
48 |
@timing_decorator("Retrieving chapters")
|
49 |
+
def retrieve_chapters(timestamped_text, yt_chapters, openai_api_key):
|
50 |
# Get chapters
|
51 |
if len(yt_chapters) == 0:
|
52 |
+
chapters = get_automatic_chapters(timestamped_text, openai_api_key)
|
53 |
else:
|
54 |
chapters = align_chapters(timestamped_text, yt_chapters)
|
55 |
return chapters
|
56 |
|
57 |
|
58 |
@timing_decorator("Summarizing video")
|
59 |
+
def summarize_youtube_chapters(chapters, openai_api_key):
|
60 |
# Summarize chapters
|
61 |
+
summarized_chapters = summarize_chapters(chapters, openai_api_key)
|
62 |
return summarized_chapters
|
63 |
|
64 |
|
|
|
85 |
audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir)
|
86 |
timestamped_text = audio_to_text(audio_fpath)
|
87 |
|
88 |
+
chapters = retrieve_chapters(timestamped_text, yt_chapters, openai.api_key)
|
89 |
+
summarized_chapters, overall_summary = summarize_youtube_chapters(
|
90 |
+
chapters, openai.api_key
|
91 |
+
)
|
92 |
|
93 |
st.write(f"**TLDR:** {overall_summary}")
|
94 |
|
|
|
112 |
|
113 |
def app():
|
114 |
st.title("Video Summarizer")
|
115 |
+
openai.api_key = os.environ.get("OPENAI_API_KEYS")
|
116 |
+
if openai.api_key is None:
|
117 |
+
openai.api_key = st.text_input("OPENAI_API_KEY")
|
118 |
+
|
119 |
youtube_url = st.text_input("Enter a YouTube URL")
|
120 |
|
121 |
# Add summarize button
|
text_summary.py
CHANGED
@@ -19,15 +19,11 @@ CHAPTER_TITLE = "Give a title to this video chapter based on the transcript: "
|
|
19 |
title_template = "Give a title to this text summary: {text}"
|
20 |
TITLE_PROMPT = PromptTemplate(template=title_template, input_variables=["text"])
|
21 |
|
22 |
-
openai.api_key = os.environ.get("CHATGPT_API_KEY")
|
23 |
-
|
24 |
-
if openai.api_key is None:
|
25 |
-
raise ValueError("CHATGPT_API_KEY environment variable not set")
|
26 |
-
|
27 |
-
|
28 |
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
29 |
-
def get_embeddings(text_chunks, model="text-embedding-ada-002"):
|
30 |
-
data = openai.Embedding.create(
|
|
|
|
|
31 |
embeddings = [item["embedding"] for item in data]
|
32 |
return np.array(embeddings)
|
33 |
|
@@ -98,12 +94,18 @@ def align_chapters(timestamped_transcript, yt_chapters):
|
|
98 |
return chapters
|
99 |
|
100 |
|
101 |
-
def get_automatic_chapters(
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
# Split into chunks
|
105 |
text_chunks = get_chunks(timestamped_transcripts, chunk_lines)
|
106 |
-
embeddings = get_embeddings(text_chunks)
|
107 |
|
108 |
# Creating and fitting the K-means model
|
109 |
kmeans = KMeans(n_clusters=num_clusters)
|
@@ -161,8 +163,8 @@ def get_chunk_text(chunk):
|
|
161 |
return chunk_text
|
162 |
|
163 |
|
164 |
-
def summarize_chapters(chapters):
|
165 |
-
llm = OpenAI(temperature=0.9, openai_api_key=
|
166 |
chapter_docs = [Document(page_content=chapter["text"]) for chapter in chapters]
|
167 |
|
168 |
summary_chain = load_summarize_chain(
|
|
|
19 |
title_template = "Give a title to this text summary: {text}"
|
20 |
TITLE_PROMPT = PromptTemplate(template=title_template, input_variables=["text"])
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
23 |
+
def get_embeddings(text_chunks, openai_api_key, model="text-embedding-ada-002"):
|
24 |
+
data = openai.Embedding.create(
|
25 |
+
input=text_chunks, model=model, openai_api_key=openai_api_key
|
26 |
+
)["data"]
|
27 |
embeddings = [item["embedding"] for item in data]
|
28 |
return np.array(embeddings)
|
29 |
|
|
|
94 |
return chapters
|
95 |
|
96 |
|
97 |
+
def get_automatic_chapters(
|
98 |
+
timestamped_transcript, openai_api_key, chunk_lines=5, num_clusters=3
|
99 |
+
):
|
100 |
+
timestamped_transcripts = [
|
101 |
+
timestamped_line
|
102 |
+
for timestamped_line in timestamped_transcript.split("\n")
|
103 |
+
if len(timestamped_line.strip()) > 0
|
104 |
+
]
|
105 |
|
106 |
# Split into chunks
|
107 |
text_chunks = get_chunks(timestamped_transcripts, chunk_lines)
|
108 |
+
embeddings = get_embeddings(text_chunks, openai_api_key)
|
109 |
|
110 |
# Creating and fitting the K-means model
|
111 |
kmeans = KMeans(n_clusters=num_clusters)
|
|
|
163 |
return chunk_text
|
164 |
|
165 |
|
166 |
+
def summarize_chapters(chapters, openai_api_key):
|
167 |
+
llm = OpenAI(temperature=0.9, openai_api_key=openai_api_key)
|
168 |
chapter_docs = [Document(page_content=chapter["text"]) for chapter in chapters]
|
169 |
|
170 |
summary_chain = load_summarize_chain(
|