viboognesh commited on
Commit
44dcf43
1 Parent(s): 78bdcea

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +47 -0
  2. streamlit_functions.py +72 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import tempfile
4
+
5
+ from dotenv import load_dotenv
6
+ load_dotenv()
7
+
8
+ from streamlit_functions import RAGChain
9
+ def main():
10
+ st.title("RAG Application")
11
+
12
+ if "openai_api_key" not in st.session_state:
13
+ st.session_state.openai_api_key = os.getenv("OPENAI_API_KEY")
14
+ if "RAGChatbot" not in st.session_state:
15
+ st.session_state.RAGChatbot = None
16
+
17
+ if st.session_state.openai_api_key is None:
18
+ st.session_state.openai_api_key = st.text_input("OPENAI_API Key", type="password")
19
+ else:
20
+ with st.sidebar:
21
+ if uploaded_file := st.file_uploader("Choose a file", type=["pdf"]):
22
+ with tempfile.TemporaryDirectory() as tmpdirname:
23
+ with open(os.path.join(tmpdirname, uploaded_file.name), "wb") as f:
24
+ f.write(uploaded_file.getbuffer())
25
+ st.session_state.RAGChatbot = RAGChain(pdf_file_path=os.path.join(tmpdirname, uploaded_file.name), api_key=st.session_state.openai_api_key)
26
+
27
+ if st.session_state.RAGChatbot is not None:
28
+ for chat_message in st.session_state.RAGChatbot.get_chat_history():
29
+ with st.chat_message("user"):
30
+ st.write(chat_message["user"])
31
+ with st.chat_message("assistant"):
32
+ st.write(chat_message["assistant"])
33
+ if user_query := st.chat_input("Ask a question:"):
34
+ with st.chat_message("user"):
35
+ st.write(user_query)
36
+ with st.spinner("Waiting for response..."):
37
+ answer, context_list = st.session_state.RAGChatbot.ask_question(user_query)
38
+ with st.chat_message("assistant"):
39
+ st.write(answer)
40
+ with st.sidebar:
41
+ st.subheader("Context")
42
+ for context in context_list:
43
+ st.write(context)
44
+ st.write("-"*25)
45
+
46
+ if __name__ == "__main__":
47
+ main()
streamlit_functions.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ import os
3
+ from langchain_openai import ChatOpenAI
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_openai import OpenAIEmbeddings
7
+
8
+ from dotenv import load_dotenv
9
+ load_dotenv()
10
+
11
+ class RAGChain:
12
+ def __init__(self, pdf_file_path, api_key=os.getenv("OPENAI_API_KEY")):
13
+ pdf_text = self.extract_text_from_pdf_with_pypdf2(pdf_file_path)
14
+ chunked_documents = self.create_chunks_for_pypdf2_parse(pdf_text)
15
+ vectorstore = self.create_vectorstore_with_faiss(chunked_documents)
16
+ # Creating LLM
17
+ self.llm = ChatOpenAI(model="gpt-4o-mini", api_key=api_key)
18
+ # Creating Retriever from vectorstore
19
+ self.retriever = vectorstore.as_retriever()
20
+ # Create a chat history to store the conversation history
21
+ self.chat_history = []
22
+
23
+ def ask_question(self, question):
24
+ # Create a chat history text to pass to LLM to create a single question
25
+ chat_history_text = "\n".join([f"User:{f['user']}\nAssistant:{f['assistant']}" for f in self.chat_history[:-10]])
26
+ # Prompt to create a single question that will help us retrieve relevant context
27
+ single_question_prompt = f"You will be given chat history and the user question. Your task is to reply with a single question that accurately represents the user question based on the context of the chat history. \n\nChat history:\n{chat_history_text}\n\nUser question: {question}\n\n Reply with the single question and nothing else.\n\nSingle Answer:"
28
+ # Use LLM to create a single question
29
+ single_question = self.llm.invoke(single_question_prompt).content
30
+ # Retrieve the relevant context from the vectorstore
31
+ context = self.retriever.invoke(single_question)
32
+ context = [f.page_content for f in context]
33
+ context_text = "\n\n".join(context)
34
+ # Prompt to answer the single question
35
+ answer_prompt = f"You will be given a context and a question. Your task is to answer the question based on the context. \n\nContext:\n{context_text}\n\nQuestion: {single_question}\n\n Answer:"
36
+ # Use LLM to answer the question
37
+ answer = self.llm.invoke(answer_prompt).content
38
+ # Update the chat history
39
+ self.chat_history.append({"user": question, "assistant": answer})
40
+ # Return the answer
41
+ return answer, context
42
+
43
+ def clear_history(self):
44
+ self.chat_history = []
45
+
46
+ def get_chat_history(self):
47
+ return self.chat_history
48
+
49
+ def extract_text_from_pdf_with_pypdf2(self, file_path):
50
+ pdf_reader = PyPDF2.PdfReader(file_path)
51
+
52
+ full_text = ""
53
+ for page_num in range(len(pdf_reader.pages)):
54
+ page = pdf_reader.pages[page_num]
55
+ text = page.extract_text()
56
+ full_text += text + "\n"
57
+
58
+ return full_text
59
+
60
+ def create_chunks_for_pypdf2_parse(self, pdf_text):
61
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
62
+ chunks = text_splitter.split_text(pdf_text)
63
+ return chunks
64
+
65
+ def create_vectorstore_with_faiss(self, chunked_documents):
66
+ embedding_function = OpenAIEmbeddings()
67
+ if type(chunked_documents[0]) == str:
68
+ vectorstore = FAISS.from_texts(chunked_documents, embedding_function)
69
+ else:
70
+ vectorstore = FAISS.from_documents(chunked_documents, embedding_function)
71
+ # Save the vectorstore to local path
72
+ return vectorstore