File size: 3,244 Bytes
a789caf
 
 
 
 
 
 
 
 
 
 
 
 
 
4cd94cd
a789caf
 
01acbb5
a789caf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import streamlit as st
import google.generativeai as genai
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
import fitz
from dotenv import load_dotenv
import os
load_dotenv()

genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
st.title('Document Q&A')
st.write("Please upload a PDF file and create a vector store before asking questions.")

def get_chain():
    model = ChatGoogleGenerativeAI(model='gemini-2.0-flash', temperature=0.1)
    prompt_ = """
    Answer the questions as detailed as possible from the provided context, make sure to provide all the 
        details, if the answer is not in the provided context just say, "answer is not available in context",   
        don't provide the wrong answer\n. 
        context: {context}
        Questions:{question}
        Answer:
    """
    prompt = PromptTemplate(template=prompt_, input_variables=["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    return chain


def get_pdf_content(pdffile):
    with fitz.open(stream=pdffile.read(), filetype="pdf") as doc:
        text = ""
        for page in doc:
            text += page.get_text()
    return text


def create_database(data):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    final_document = text_splitter.split_text(data)
    vectors = FAISS.from_texts(final_document, embeddings)
    vectors.save_local("faiss_index")


def user_input(u_question):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    new_db = FAISS.load_local('faiss_index', embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(u_question)
    chain = get_chain()
    response = chain(
        {"input_documents": docs, "question": u_question}, return_only_outputs=True
    )
    return response["output_text"]


with st.sidebar:
    uploaded_file = st.file_uploader("Upload pdf file", key="pdf_uploader")
    if st.button('Create vector store'):
        if uploaded_file is not None:
            data = get_pdf_content(uploaded_file)
            create_database(data)
            st.write("Vector store created")
        else:
            st.write("Please upload pdf file")

if "messages" not in st.session_state:
    st.session_state.messages = []

for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])


if prompt := st.chat_input("Ask questions"):
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)
    with st.chat_message("assistant"):
        message_placeholder = st.empty()
        with st.spinner(text="Fetching details..."):
            response = user_input(prompt)
        message_placeholder.markdown(response)
    st.session_state.messages.append({"role": "assistant", "content": response})