import streamlit as st
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

from langchain.agents import initialize_agent, AgentType
from langchain.callbacks import StreamlitCallbackHandler
from langchain.chat_models import ChatOpenAI

from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

import os
from dotenv import load_dotenv

load_dotenv()


# provide the path of  pdf file/files.
pdfreader = PdfReader('input_data/nvidia_10k.pdf')

with st.sidebar:
    openai_api_key = st.text_input("OpenAI API Key", type="password")
    "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"

@st.cache_data
def split_chunk_text(input_path="input_data/nvidia_10k.pdf"):
    from typing_extensions import Concatenate
    # read text from pdf
    
    pdfreader = PdfReader(input_path)
    raw_text = ''
    for i, page in enumerate(pdfreader.pages):
        content = page.extract_text()
        if content:
            raw_text += content
    text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 800,
    chunk_overlap  = 200,
    length_function = len,
    )
    texts = text_splitter.split_text(raw_text)
    
    return texts
    

with st.form("my_form"):
    texts = split_chunk_text()
    
    embeddings = OpenAIEmbeddings(
        model="text-embedding-3-small"
        )
    
    vector_store = FAISS.from_texts(texts, embeddings)
    retriever = vector_store.as_retriever()
    text = st.text_area("Enter question:", " ")
    
    submitted = st.form_submit_button("Submit")
    if not openai_api_key:
        st.info("Please add your OpenAI API key to continue.")
    elif submitted:
        #texts = split_chunk_text()
        
        from operator import itemgetter

        from langchain.prompts import ChatPromptTemplate

        template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':

        Context:
        {context}

        Question:
        {question}
        """

        prompt = ChatPromptTemplate.from_template(template)

        primary_qa_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

        retrieval_augmented_qa_chain = (
            # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
            # "question" : populated by getting the value of the "question" key
            # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
            {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
            # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
            #              by getting the value of the "context" key from the previous step
            | RunnablePassthrough.assign(context=itemgetter("context"))
            # "response" : the "context" and "question" values are used to format our prompt object and then piped
            #              into the LLM and stored in a key called "response"
            # "context"  : populated by getting the value of the "context" key from the previous step
            | {"response": prompt | primary_qa_llm, "context": itemgetter("context")}
        )
        #query = "Who is liable in case of an accident if a learner is driving with an instructor?"
        
        result = retrieval_augmented_qa_chain.invoke({"question" : text})
        
        
        st.info(result["response"].content)