File size: 2,627 Bytes
1b60a9a 574fc32 1b60a9a 574fc32 1b60a9a 111a9a3 1b60a9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# Import the necessary packages
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import textwrap
import streamlit as st
# Load the OpenAI Embeddings, LLM , PromptTemplate and LLMChain
embeddings = OpenAIEmbeddings()
llm = OpenAI(temperature=0)
# Define the template for the prompt
template = """You can provide answers about YouTube videos using their transcripts.
For the question: {question}
Please refer to the video transcript: {docs_page_content}
Rely solely on the transcript's factual data to respond.
If the information isn't sufficient, simply state "I don't know".
Ensure your answers are comprehensive and in-depth.
"""
prompt = PromptTemplate(
input_variables=["question", "docs_page_content"],
template=template,
)
chain = LLMChain(llm=llm, prompt=prompt)
# Setup streamlit
st.title("YouTube Video Transcript Analyzer")
# *** YOUR VIDEO URL and QUESTION ***
video_url = st.text_input("Enter the YouTube video URL:")
question = st.text_input("Enter your question about the video:")
# add submit button
# submit = st.button("Submit")
#
if video_url and question:
# load the video transcript
loader = YoutubeLoader.from_youtube_url(video_url, add_video_info=True)
# show the video title and author
info = loader._get_video_info()
st.write("**Title:**", info["title"])
st.write("**Author:**", info["author"])
# Split the transcript into chunks with 1500 characters and 150 characters overlap
transcript = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
docs = text_splitter.split_documents(transcript)
# docs[0].page_content
# Create the vector database which will be used to search for similar sentences
vectordb = Chroma.from_documents(
documents=docs, embedding=embeddings, persist_directory="./chroma_db"
)
# Search for the most similar sentences to the question and concatenate top 3 vectors
docs = vectordb.similarity_search(query=question, k=3)
docs_page_content = " ".join([doc.page_content for doc in docs])
# docs[0].page_content
# send the question and the top 3 sentences to the LLMChain and print the response
response = chain.run(question=question, docs_page_content=docs_page_content)
st.write(textwrap.fill(response, width=85))
|