ammansik commited on
Commit
dd1ba34
·
1 Parent(s): e3d3533

refactoring

Browse files
Files changed (2) hide show
  1. app.py +13 -6
  2. text_summary.py +15 -13
app.py CHANGED
@@ -5,6 +5,7 @@ import time
5
  from functools import wraps
6
  from shutil import rmtree
7
 
 
8
  import streamlit as st
9
 
10
  from audio_to_text import transcribe_audio
@@ -45,19 +46,19 @@ def audio_to_text(audio_fpath):
45
 
46
 
47
  @timing_decorator("Retrieving chapters")
48
- def retrieve_chapters(timestamped_text, yt_chapters):
49
  # Get chapters
50
  if len(yt_chapters) == 0:
51
- chapters = get_automatic_chapters(timestamped_text)
52
  else:
53
  chapters = align_chapters(timestamped_text, yt_chapters)
54
  return chapters
55
 
56
 
57
  @timing_decorator("Summarizing video")
58
- def summarize_youtube_chapters(chapters):
59
  # Summarize chapters
60
- summarized_chapters = summarize_chapters(chapters)
61
  return summarized_chapters
62
 
63
 
@@ -84,8 +85,10 @@ def summarize_video(youtube_url):
84
  audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir)
85
  timestamped_text = audio_to_text(audio_fpath)
86
 
87
- chapters = retrieve_chapters(timestamped_text, yt_chapters)
88
- summarized_chapters, overall_summary = summarize_youtube_chapters(chapters)
 
 
89
 
90
  st.write(f"**TLDR:** {overall_summary}")
91
 
@@ -109,6 +112,10 @@ def summarize_video(youtube_url):
109
 
110
  def app():
111
  st.title("Video Summarizer")
 
 
 
 
112
  youtube_url = st.text_input("Enter a YouTube URL")
113
 
114
  # Add summarize button
 
5
  from functools import wraps
6
  from shutil import rmtree
7
 
8
+ import openai
9
  import streamlit as st
10
 
11
  from audio_to_text import transcribe_audio
 
46
 
47
 
48
  @timing_decorator("Retrieving chapters")
49
+ def retrieve_chapters(timestamped_text, yt_chapters, openai_api_key):
50
  # Get chapters
51
  if len(yt_chapters) == 0:
52
+ chapters = get_automatic_chapters(timestamped_text, openai_api_key)
53
  else:
54
  chapters = align_chapters(timestamped_text, yt_chapters)
55
  return chapters
56
 
57
 
58
  @timing_decorator("Summarizing video")
59
+ def summarize_youtube_chapters(chapters, openai_api_key):
60
  # Summarize chapters
61
+ summarized_chapters = summarize_chapters(chapters, openai_api_key)
62
  return summarized_chapters
63
 
64
 
 
85
  audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir)
86
  timestamped_text = audio_to_text(audio_fpath)
87
 
88
+ chapters = retrieve_chapters(timestamped_text, yt_chapters, openai.api_key)
89
+ summarized_chapters, overall_summary = summarize_youtube_chapters(
90
+ chapters, openai.api_key
91
+ )
92
 
93
  st.write(f"**TLDR:** {overall_summary}")
94
 
 
112
 
113
  def app():
114
  st.title("Video Summarizer")
115
+ openai.api_key = os.environ.get("OPENAI_API_KEYS")
116
+ if openai.api_key is None:
117
+ openai.api_key = st.text_input("OPENAI_API_KEY")
118
+
119
  youtube_url = st.text_input("Enter a YouTube URL")
120
 
121
  # Add summarize button
text_summary.py CHANGED
@@ -19,15 +19,11 @@ CHAPTER_TITLE = "Give a title to this video chapter based on the transcript: "
19
  title_template = "Give a title to this text summary: {text}"
20
  TITLE_PROMPT = PromptTemplate(template=title_template, input_variables=["text"])
21
 
22
- openai.api_key = os.environ.get("CHATGPT_API_KEY")
23
-
24
- if openai.api_key is None:
25
- raise ValueError("CHATGPT_API_KEY environment variable not set")
26
-
27
-
28
  @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
29
- def get_embeddings(text_chunks, model="text-embedding-ada-002"):
30
- data = openai.Embedding.create(input=text_chunks, model=model)["data"]
 
 
31
  embeddings = [item["embedding"] for item in data]
32
  return np.array(embeddings)
33
 
@@ -98,12 +94,18 @@ def align_chapters(timestamped_transcript, yt_chapters):
98
  return chapters
99
 
100
 
101
- def get_automatic_chapters(timestamped_transcript, chunk_lines=5, num_clusters=3):
102
- timestamped_transcripts = timestamped_transcript.split("\n")
 
 
 
 
 
 
103
 
104
  # Split into chunks
105
  text_chunks = get_chunks(timestamped_transcripts, chunk_lines)
106
- embeddings = get_embeddings(text_chunks)
107
 
108
  # Creating and fitting the K-means model
109
  kmeans = KMeans(n_clusters=num_clusters)
@@ -161,8 +163,8 @@ def get_chunk_text(chunk):
161
  return chunk_text
162
 
163
 
164
- def summarize_chapters(chapters):
165
- llm = OpenAI(temperature=0.9, openai_api_key=os.environ.get("CHATGPT_API_KEY"))
166
  chapter_docs = [Document(page_content=chapter["text"]) for chapter in chapters]
167
 
168
  summary_chain = load_summarize_chain(
 
19
  title_template = "Give a title to this text summary: {text}"
20
  TITLE_PROMPT = PromptTemplate(template=title_template, input_variables=["text"])
21
 
 
 
 
 
 
 
22
  @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
23
+ def get_embeddings(text_chunks, openai_api_key, model="text-embedding-ada-002"):
24
+ data = openai.Embedding.create(
25
+ input=text_chunks, model=model, openai_api_key=openai_api_key
26
+ )["data"]
27
  embeddings = [item["embedding"] for item in data]
28
  return np.array(embeddings)
29
 
 
94
  return chapters
95
 
96
 
97
+ def get_automatic_chapters(
98
+ timestamped_transcript, openai_api_key, chunk_lines=5, num_clusters=3
99
+ ):
100
+ timestamped_transcripts = [
101
+ timestamped_line
102
+ for timestamped_line in timestamped_transcript.split("\n")
103
+ if len(timestamped_line.strip()) > 0
104
+ ]
105
 
106
  # Split into chunks
107
  text_chunks = get_chunks(timestamped_transcripts, chunk_lines)
108
+ embeddings = get_embeddings(text_chunks, openai_api_key)
109
 
110
  # Creating and fitting the K-means model
111
  kmeans = KMeans(n_clusters=num_clusters)
 
163
  return chunk_text
164
 
165
 
166
+ def summarize_chapters(chapters, openai_api_key):
167
+ llm = OpenAI(temperature=0.9, openai_api_key=openai_api_key)
168
  chapter_docs = [Document(page_content=chapter["text"]) for chapter in chapters]
169
 
170
  summary_chain = load_summarize_chain(