Spaces:

ammansik
/

youtube_summarizer

Sleeping

App Files Files Community

ammansik commited on Jun 6, 2023

Commit

dd1ba34

1 Parent(s): e3d3533

refactoring

Browse files

Files changed (2) hide show

app.py +13 -6
text_summary.py +15 -13

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import time
 from functools import wraps
 from shutil import rmtree
 import streamlit as st
 from audio_to_text import transcribe_audio
@@ -45,19 +46,19 @@ def audio_to_text(audio_fpath):
 @timing_decorator("Retrieving chapters")
-def retrieve_chapters(timestamped_text, yt_chapters):
     # Get chapters
     if len(yt_chapters) == 0:
-        chapters = get_automatic_chapters(timestamped_text)
     else:
         chapters = align_chapters(timestamped_text, yt_chapters)
     return chapters
 @timing_decorator("Summarizing video")
-def summarize_youtube_chapters(chapters):
     # Summarize chapters
-    summarized_chapters = summarize_chapters(chapters)
     return summarized_chapters
@@ -84,8 +85,10 @@ def summarize_video(youtube_url):
     audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir)
     timestamped_text = audio_to_text(audio_fpath)
-    chapters = retrieve_chapters(timestamped_text, yt_chapters)
-    summarized_chapters, overall_summary = summarize_youtube_chapters(chapters)
     st.write(f"**TLDR:** {overall_summary}")
@@ -109,6 +112,10 @@ def summarize_video(youtube_url):
 def app():
     st.title("Video Summarizer")
     youtube_url = st.text_input("Enter a YouTube URL")
     # Add summarize button

 from functools import wraps
 from shutil import rmtree
+import openai
 import streamlit as st
 from audio_to_text import transcribe_audio
 @timing_decorator("Retrieving chapters")
+def retrieve_chapters(timestamped_text, yt_chapters, openai_api_key):
     # Get chapters
     if len(yt_chapters) == 0:
+        chapters = get_automatic_chapters(timestamped_text, openai_api_key)
     else:
         chapters = align_chapters(timestamped_text, yt_chapters)
     return chapters
 @timing_decorator("Summarizing video")
+def summarize_youtube_chapters(chapters, openai_api_key):
     # Summarize chapters
+    summarized_chapters = summarize_chapters(chapters, openai_api_key)
     return summarized_chapters
     audio_fpath, yt_chapters = download_youtube(youtube_url, work_dir)
     timestamped_text = audio_to_text(audio_fpath)
+    chapters = retrieve_chapters(timestamped_text, yt_chapters, openai.api_key)
+    summarized_chapters, overall_summary = summarize_youtube_chapters(
+        chapters, openai.api_key
+    )
     st.write(f"**TLDR:** {overall_summary}")
 def app():
     st.title("Video Summarizer")
+    openai.api_key = os.environ.get("OPENAI_API_KEYS")
+    if openai.api_key is None:
+        openai.api_key = st.text_input("OPENAI_API_KEY")
     youtube_url = st.text_input("Enter a YouTube URL")
     # Add summarize button

text_summary.py CHANGED Viewed

@@ -19,15 +19,11 @@ CHAPTER_TITLE = "Give a title to this video chapter based on the transcript: "
 title_template = "Give a title to this text summary: {text}"
 TITLE_PROMPT = PromptTemplate(template=title_template, input_variables=["text"])
-openai.api_key = os.environ.get("CHATGPT_API_KEY")
-if openai.api_key is None:
-    raise ValueError("CHATGPT_API_KEY environment variable not set")
 @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-def get_embeddings(text_chunks, model="text-embedding-ada-002"):
-    data = openai.Embedding.create(input=text_chunks, model=model)["data"]
     embeddings = [item["embedding"] for item in data]
     return np.array(embeddings)
@@ -98,12 +94,18 @@ def align_chapters(timestamped_transcript, yt_chapters):
     return chapters
-def get_automatic_chapters(timestamped_transcript, chunk_lines=5, num_clusters=3):
-    timestamped_transcripts = timestamped_transcript.split("\n")
     # Split into chunks
     text_chunks = get_chunks(timestamped_transcripts, chunk_lines)
-    embeddings = get_embeddings(text_chunks)
     # Creating and fitting the K-means model
     kmeans = KMeans(n_clusters=num_clusters)
@@ -161,8 +163,8 @@ def get_chunk_text(chunk):
     return chunk_text
-def summarize_chapters(chapters):
-    llm = OpenAI(temperature=0.9, openai_api_key=os.environ.get("CHATGPT_API_KEY"))
     chapter_docs = [Document(page_content=chapter["text"]) for chapter in chapters]
     summary_chain = load_summarize_chain(

 title_template = "Give a title to this text summary: {text}"
 TITLE_PROMPT = PromptTemplate(template=title_template, input_variables=["text"])
 @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
+def get_embeddings(text_chunks, openai_api_key, model="text-embedding-ada-002"):
+    data = openai.Embedding.create(
+        input=text_chunks, model=model, openai_api_key=openai_api_key
+    )["data"]
     embeddings = [item["embedding"] for item in data]
     return np.array(embeddings)
     return chapters
+def get_automatic_chapters(
+    timestamped_transcript, openai_api_key, chunk_lines=5, num_clusters=3
+):
+    timestamped_transcripts = [
+        timestamped_line
+        for timestamped_line in timestamped_transcript.split("\n")
+        if len(timestamped_line.strip()) > 0
+    ]
     # Split into chunks
     text_chunks = get_chunks(timestamped_transcripts, chunk_lines)
+    embeddings = get_embeddings(text_chunks, openai_api_key)
     # Creating and fitting the K-means model
     kmeans = KMeans(n_clusters=num_clusters)
     return chunk_text
+def summarize_chapters(chapters, openai_api_key):
+    llm = OpenAI(temperature=0.9, openai_api_key=openai_api_key)
     chapter_docs = [Document(page_content=chapter["text"]) for chapter in chapters]
     summary_chain = load_summarize_chain(