Spaces:
Sleeping
Sleeping
viboognesh
commited on
Commit
•
44dcf43
1
Parent(s):
78bdcea
Upload 2 files
Browse files- app.py +47 -0
- streamlit_functions.py +72 -0
app.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import tempfile
|
4 |
+
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
from streamlit_functions import RAGChain
|
9 |
+
def main():
|
10 |
+
st.title("RAG Application")
|
11 |
+
|
12 |
+
if "openai_api_key" not in st.session_state:
|
13 |
+
st.session_state.openai_api_key = os.getenv("OPENAI_API_KEY")
|
14 |
+
if "RAGChatbot" not in st.session_state:
|
15 |
+
st.session_state.RAGChatbot = None
|
16 |
+
|
17 |
+
if st.session_state.openai_api_key is None:
|
18 |
+
st.session_state.openai_api_key = st.text_input("OPENAI_API Key", type="password")
|
19 |
+
else:
|
20 |
+
with st.sidebar:
|
21 |
+
if uploaded_file := st.file_uploader("Choose a file", type=["pdf"]):
|
22 |
+
with tempfile.TemporaryDirectory() as tmpdirname:
|
23 |
+
with open(os.path.join(tmpdirname, uploaded_file.name), "wb") as f:
|
24 |
+
f.write(uploaded_file.getbuffer())
|
25 |
+
st.session_state.RAGChatbot = RAGChain(pdf_file_path=os.path.join(tmpdirname, uploaded_file.name), api_key=st.session_state.openai_api_key)
|
26 |
+
|
27 |
+
if st.session_state.RAGChatbot is not None:
|
28 |
+
for chat_message in st.session_state.RAGChatbot.get_chat_history():
|
29 |
+
with st.chat_message("user"):
|
30 |
+
st.write(chat_message["user"])
|
31 |
+
with st.chat_message("assistant"):
|
32 |
+
st.write(chat_message["assistant"])
|
33 |
+
if user_query := st.chat_input("Ask a question:"):
|
34 |
+
with st.chat_message("user"):
|
35 |
+
st.write(user_query)
|
36 |
+
with st.spinner("Waiting for response..."):
|
37 |
+
answer, context_list = st.session_state.RAGChatbot.ask_question(user_query)
|
38 |
+
with st.chat_message("assistant"):
|
39 |
+
st.write(answer)
|
40 |
+
with st.sidebar:
|
41 |
+
st.subheader("Context")
|
42 |
+
for context in context_list:
|
43 |
+
st.write(context)
|
44 |
+
st.write("-"*25)
|
45 |
+
|
46 |
+
if __name__ == "__main__":
|
47 |
+
main()
|
streamlit_functions.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import PyPDF2
|
2 |
+
import os
|
3 |
+
from langchain_openai import ChatOpenAI
|
4 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
5 |
+
from langchain_community.vectorstores import FAISS
|
6 |
+
from langchain_openai import OpenAIEmbeddings
|
7 |
+
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
class RAGChain:
|
12 |
+
def __init__(self, pdf_file_path, api_key=os.getenv("OPENAI_API_KEY")):
|
13 |
+
pdf_text = self.extract_text_from_pdf_with_pypdf2(pdf_file_path)
|
14 |
+
chunked_documents = self.create_chunks_for_pypdf2_parse(pdf_text)
|
15 |
+
vectorstore = self.create_vectorstore_with_faiss(chunked_documents)
|
16 |
+
# Creating LLM
|
17 |
+
self.llm = ChatOpenAI(model="gpt-4o-mini", api_key=api_key)
|
18 |
+
# Creating Retriever from vectorstore
|
19 |
+
self.retriever = vectorstore.as_retriever()
|
20 |
+
# Create a chat history to store the conversation history
|
21 |
+
self.chat_history = []
|
22 |
+
|
23 |
+
def ask_question(self, question):
|
24 |
+
# Create a chat history text to pass to LLM to create a single question
|
25 |
+
chat_history_text = "\n".join([f"User:{f['user']}\nAssistant:{f['assistant']}" for f in self.chat_history[:-10]])
|
26 |
+
# Prompt to create a single question that will help us retrieve relevant context
|
27 |
+
single_question_prompt = f"You will be given chat history and the user question. Your task is to reply with a single question that accurately represents the user question based on the context of the chat history. \n\nChat history:\n{chat_history_text}\n\nUser question: {question}\n\n Reply with the single question and nothing else.\n\nSingle Answer:"
|
28 |
+
# Use LLM to create a single question
|
29 |
+
single_question = self.llm.invoke(single_question_prompt).content
|
30 |
+
# Retrieve the relevant context from the vectorstore
|
31 |
+
context = self.retriever.invoke(single_question)
|
32 |
+
context = [f.page_content for f in context]
|
33 |
+
context_text = "\n\n".join(context)
|
34 |
+
# Prompt to answer the single question
|
35 |
+
answer_prompt = f"You will be given a context and a question. Your task is to answer the question based on the context. \n\nContext:\n{context_text}\n\nQuestion: {single_question}\n\n Answer:"
|
36 |
+
# Use LLM to answer the question
|
37 |
+
answer = self.llm.invoke(answer_prompt).content
|
38 |
+
# Update the chat history
|
39 |
+
self.chat_history.append({"user": question, "assistant": answer})
|
40 |
+
# Return the answer
|
41 |
+
return answer, context
|
42 |
+
|
43 |
+
def clear_history(self):
|
44 |
+
self.chat_history = []
|
45 |
+
|
46 |
+
def get_chat_history(self):
|
47 |
+
return self.chat_history
|
48 |
+
|
49 |
+
def extract_text_from_pdf_with_pypdf2(self, file_path):
|
50 |
+
pdf_reader = PyPDF2.PdfReader(file_path)
|
51 |
+
|
52 |
+
full_text = ""
|
53 |
+
for page_num in range(len(pdf_reader.pages)):
|
54 |
+
page = pdf_reader.pages[page_num]
|
55 |
+
text = page.extract_text()
|
56 |
+
full_text += text + "\n"
|
57 |
+
|
58 |
+
return full_text
|
59 |
+
|
60 |
+
def create_chunks_for_pypdf2_parse(self, pdf_text):
|
61 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
62 |
+
chunks = text_splitter.split_text(pdf_text)
|
63 |
+
return chunks
|
64 |
+
|
65 |
+
def create_vectorstore_with_faiss(self, chunked_documents):
|
66 |
+
embedding_function = OpenAIEmbeddings()
|
67 |
+
if type(chunked_documents[0]) == str:
|
68 |
+
vectorstore = FAISS.from_texts(chunked_documents, embedding_function)
|
69 |
+
else:
|
70 |
+
vectorstore = FAISS.from_documents(chunked_documents, embedding_function)
|
71 |
+
# Save the vectorstore to local path
|
72 |
+
return vectorstore
|