Spaces:
Sleeping
Sleeping
File size: 2,062 Bytes
62d725c d66126a 62d725c d66126a 62d725c d66126a 62d725c d66126a 1af48ba 656b3bd 067fc57 d66126a 62d725c d66126a da1a17c d66126a 62d725c d66126a 1af48ba 62d725c d66126a 62d725c d66126a 62d725c d66126a 62d725c d66126a 62d725c d66126a 62d725c d66126a 62d725c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import streamlit as st
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
import os
# Set up the directories for data and vector DB
DATA_DIR = "MyData"
DB_DIR = "MyData"
# Initialize the embeddings model
embeddings_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# Load and process PDF documents
def load_data():
loader = PyPDFDirectoryLoader(DATA_DIR)
data_on_pdf = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
separators=["\n\n", "\n", ". ", " ", ""],
chunk_size=2000,
chunk_overlap=200
)
splits = text_splitter.split_documents(data_on_pdf)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model, persist_directory=DB_DIR)
return vectorstore
# Set up the generative AI model
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key="AIzaSyAnsIVS4x_7lJLe9AYXGLV8FRwUTQkB-1w")
# Load vector store
vectorstore = load_data()
# Streamlit interface
st.title("RAG App: Question-Answering with PDFs")
# User input for question
question = st.text_input("Ask a question about the documents:")
if st.button("Submit"):
if question:
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
response = rag_chain.invoke(question)
st.markdown(response)
else:
st.warning("Please enter a question.")
|