File size: 6,464 Bytes
4abc04d
 
297c1c5
4abc04d
 
 
 
 
297c1c5
 
 
 
 
4abc04d
 
297c1c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4abc04d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297c1c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4abc04d
 
 
 
297c1c5
 
 
 
 
 
 
 
 
 
 
 
4abc04d
297c1c5
 
4abc04d
 
297c1c5
4abc04d
 
 
 
 
 
 
b402914
4abc04d
 
 
 
 
297c1c5
4abc04d
 
 
 
297c1c5
 
 
4abc04d
 
 
 
 
 
 
 
 
 
 
297c1c5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import load_chain
from langchain.llms import OpenAI
import streamlit as st
import openai
from langchain.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAIEmbeddings,ChatGoogleGenerativeAI
import google.generativeai as genai

import os, shutil


def get_pdf_text(pdf_docs):
    text=""
    for pdf in pdf_docs:
        pdf_reader= PdfReader(pdf)
        for page in pdf_reader.pages:
            text+= page.extract_text()
    return  text

def get_text_chunks(text,method):
    if method=='Google-Gemini':
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=300)
        chunks = text_splitter.split_text(text)
    else:
        text_splitter = CharacterTextSplitter(separator = "\n",chunk_size = 1000,chunk_overlap  = 300,length_function = len)
        chunks = text_splitter.split_text(raw_text)
    return chunks

def get_vector_store(text_chunks,method):
    try:
        if method=='Google-Gemini':
            embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
        else:
            embeddings = OpenAIEmbeddings()
        vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
        vector_store.save_local("faiss_index")
    except:
        st.warning("Wrong API, give a valid API")


def get_conversational_chain(method):
        
    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """
    if method=='Google-Gemini':
        model = ChatGoogleGenerativeAI(model="gemini-pro",
                            temperature=0.3)
    else:
        model= OpenAI()
    prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    return chain



def user_input(user_question,method):
    if method=='Google-Gemini':
        embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
    else:
        embeddings = OpenAIEmbeddings()
    
    new_db = FAISS.load_local("faiss_index", embeddings)
    docs = new_db.similarity_search(user_question)

    chain = get_conversational_chain(method)

    
    response = chain(
        {"input_documents":docs, "question": user_question}
        , return_only_outputs=True)
    return response




def delete_directory(directory_path):
    try:
        shutil.rmtree(directory_path)
        print(f"Directory '{directory_path}' successfully deleted.")
    except Exception as e:
        print(f"Error deleting directory '{directory_path}': {e}")

def return_response(query,document_search,chain):
    query = query
    docs = document_search.similarity_search(query)
    result = chain.run(input_documents=docs, question=query)
    return result




st.set_page_config(page_title="Query any Pdf", page_icon="πŸ“„")

st.title("πŸ“„ PDF Query Bot πŸ“„")
st.write("Made with ❀️ by Mainak")
with st.sidebar:
    pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit Button", accept_multiple_files=True,type=['pdf'])
    option = st.selectbox('Select a Model(choose OpenAI for best results)',('OpenAI', 'Google-Gemini'))
    if option=='OpenAI':
        api_key = st.text_input("Enter Your OpenAI API Key",type="password")
        os.environ["OPENAI_API_KEY"] = api_key
    else:
        api_key = st.text_input("Enter Your Google-Gemini API Key",type="password")
        os.environ["google_API_KEY"] = api_key
        genai.configure(api_key=os.getenv("google_API_KEY"))
if not  pdf_docs:
    try:
        delete_directory('faiss_index')
    except:
        pass
with st.sidebar:
    if st.button('Submit'):
        if api_key:
            if pdf_docs is not None:
                # Read text from the uploaded file
                os.environ["OPENAI_API_KEY"] = api_key
                with st.spinner('Wait for it...'):
                    raw_text = get_pdf_text(pdf_docs)
                    chunks = get_text_chunks(raw_text,option)                   
                    get_vector_store(chunks,option)
            else:
                st.warning("Please enter your Pdf File")
        else:
            st.warning("Please enter your API key")

if os.path.exists("faiss_index"):
        if api_key:
            if pdf_docs is not None:
                if "messages" not in st.session_state:
                    st.session_state.messages = []

                # Display chat messages from history on app rerun
                for message in st.session_state.messages:
                    with st.chat_message(message["role"]):
                        st.markdown(message["content"])
                if prompt := st.chat_input("What is up?"):
                    # Display user message in chat message container
                    with st.chat_message("user"):
                        st.markdown(prompt)
                    # Add user message to chat history
                    st.session_state.messages.append({"role": "user", "content": prompt})
                # os.environ["OPENAI_API_KEY"] = api_key
                if prompt is None:
                    re='Ask me anything about the pdf'
                else:
                    with st.spinner('Typping...'):
                        re = user_input(str(prompt),option)
                        re = re["output_text"]
                        # re=return_response(str(prompt),document_search,chain)
                response = f"PDF Mate: {re}"
                # Display assistant response in chat message container
                with st.chat_message("assistant"):
                    st.markdown(response)
                # Add assistant response to chat history
                st.session_state.messages.append({"role": "assistant", "content": response})
            else:
                st.warning("Please enter your Pdf File")
        else:
            st.warning("Please enter your API key")
else:
    pass