File size: 2,062 Bytes
62d725c
 
d66126a
 
62d725c
d66126a
62d725c
 
d66126a
 
62d725c
d66126a
1af48ba
656b3bd
067fc57
 
d66126a
62d725c
 
d66126a
 
 
 
 
 
da1a17c
d66126a
 
 
 
 
62d725c
d66126a
1af48ba
62d725c
d66126a
 
62d725c
d66126a
 
62d725c
 
d66126a
 
62d725c
d66126a
 
 
 
 
 
 
62d725c
d66126a
 
 
 
 
 
62d725c
d66126a
 
 
 
62d725c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import streamlit as st
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
import os

# Set up the directories for data and vector DB
DATA_DIR = "MyData"
DB_DIR = "MyData"


# Initialize the embeddings model
embeddings_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Load and process PDF documents
def load_data():
    loader = PyPDFDirectoryLoader(DATA_DIR)
    data_on_pdf = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n", "\n", ". ", " ", ""],
        chunk_size=2000,
        chunk_overlap=200
    )
    splits = text_splitter.split_documents(data_on_pdf)
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model, persist_directory=DB_DIR)
    return vectorstore

# Set up the generative AI model
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key="AIzaSyAnsIVS4x_7lJLe9AYXGLV8FRwUTQkB-1w")

# Load vector store
vectorstore = load_data()

# Streamlit interface
st.title("RAG App: Question-Answering with PDFs")


# User input for question
question = st.text_input("Ask a question about the documents:")

if st.button("Submit"):
    if question:
        retriever = vectorstore.as_retriever()
        prompt = hub.pull("rlm/rag-prompt")
        
        def format_docs(docs):
            return "\n\n".join(doc.page_content for doc in docs)

        rag_chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
        )

        response = rag_chain.invoke(question)
        st.markdown(response)
    else:
        st.warning("Please enter a question.")