Omid-sar commited on
Commit
1b60a9a
·
1 Parent(s): 9c17591

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import the necessary packages
2
+ import os
3
+ from langchain.embeddings import OpenAIEmbeddings
4
+ from langchain.document_loaders import YoutubeLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.llms import OpenAI
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain.chains import LLMChain
10
+ import textwrap
11
+ import streamlit as st
12
+ from apikey import apikey
13
+
14
+ # Load OpenAI API key and define directory to store the data
15
+ os.environ["OPENAI_API_KEY"] = apikey
16
+ persist_directory = "../../data/processed"
17
+
18
+ # Load the OpenAI Embeddings, LLM , PromptTemplate and LLMChain
19
+ embeddings = OpenAIEmbeddings()
20
+ llm = OpenAI(temperature=0)
21
+ # Define the template for the prompt
22
+ template = """You can provide answers about YouTube videos using their transcripts.
23
+
24
+ For the question: {question}
25
+ Please refer to the video transcript: {docs_page_content}
26
+
27
+ Rely solely on the transcript's factual data to respond.
28
+
29
+ If the information isn't sufficient, simply state "I don't know".
30
+
31
+ Ensure your answers are comprehensive and in-depth.
32
+ """
33
+ prompt = PromptTemplate(
34
+ input_variables=["question", "docs_page_content"],
35
+ template=template,
36
+ )
37
+ chain = LLMChain(llm=llm, prompt=prompt)
38
+
39
+
40
+ # Setup streamlit
41
+ st.title("YouTube Video Transcript Analyzer")
42
+ # *** YOUR VIDEO URL and QUESTION ***
43
+ video_url = st.text_input("Enter the YouTube video URL:")
44
+ question = st.text_input("Enter your question about the video:")
45
+ # add submit button
46
+ # submit = st.button("Submit")
47
+ #
48
+ if video_url and question:
49
+ # load the video transcript
50
+ loader = YoutubeLoader.from_youtube_url(video_url, add_video_info=True)
51
+ # show the video title and author
52
+ info = loader._get_video_info()
53
+ st.write("**Title:**", info["title"])
54
+ st.write("**Author:**", info["author"])
55
+ # Split the transcript into chunks with 1500 characters and 150 characters overlap
56
+ transcript = loader.load()
57
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
58
+ docs = text_splitter.split_documents(transcript)
59
+ # docs[0].page_content
60
+ # Create the vector database which will be used to search for similar sentences
61
+ vectordb = Chroma.from_documents(
62
+ documents=docs, embedding=embeddings, persist_directory=persist_directory
63
+ )
64
+
65
+ # Search for the most similar sentences to the question and concatenate top 3 vectors
66
+ docs = vectordb.similarity_search(query=question, k=3)
67
+ docs_page_content = " ".join([doc.page_content for doc in docs])
68
+ # docs[0].page_content
69
+ # send the question and the top 3 sentences to the LLMChain and print the response
70
+ response = chain.run(question=question, docs_page_content=docs_page_content)
71
+ st.write(textwrap.fill(response, width=85))