# Import the necessary packages
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import textwrap
import streamlit as st


# Load the OpenAI Embeddings, LLM , PromptTemplate and LLMChain
embeddings = OpenAIEmbeddings()
llm = OpenAI(temperature=0)
# Define the template for the prompt
template = """You can provide answers about YouTube videos using their transcripts.

    For the question: {question}
    Please refer to the video transcript: {docs_page_content}

    Rely solely on the transcript's factual data to respond.

    If the information isn't sufficient, simply state "I don't know".

    Ensure your answers are comprehensive and in-depth.
    """
prompt = PromptTemplate(
    input_variables=["question", "docs_page_content"],
    template=template,
)
chain = LLMChain(llm=llm, prompt=prompt)


# Setup streamlit
st.title("YouTube Video Transcript Analyzer")
# *** YOUR VIDEO URL and QUESTION ***
video_url = st.text_input("Enter the YouTube video URL:")
question = st.text_input("Enter your question about the video:")
# add submit button
# submit = st.button("Submit")
#
if video_url and question:
    # load the video transcript
    loader = YoutubeLoader.from_youtube_url(video_url, add_video_info=True)
    # show the video title and author
    info = loader._get_video_info()
    st.write("**Title:**", info["title"])
    st.write("**Author:**", info["author"])
    # Split the transcript into chunks with 1500 characters and 150 characters overlap
    transcript = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
    docs = text_splitter.split_documents(transcript)
    # docs[0].page_content
    # Create the vector database which will be used to search for similar sentences
    vectordb = Chroma.from_documents(
        documents=docs, embedding=embeddings, persist_directory="./chroma_db"
    )

    # Search for the most similar sentences to the question and concatenate top 3 vectors
    docs = vectordb.similarity_search(query=question, k=3)
    docs_page_content = " ".join([doc.page_content for doc in docs])
    # docs[0].page_content
    # send the question and the top 3 sentences to the LLMChain and print the response
    response = chain.run(question=question, docs_page_content=docs_page_content)
    st.write(textwrap.fill(response, width=85))