Nectar777 commited on
Commit
c2cfea0
·
1 Parent(s): be20a5b

new commit

Browse files
Files changed (2) hide show
  1. app.py +130 -0
  2. requirements.txt +23 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.chains import create_history_aware_retriever, create_retrieval_chain
3
+ from langchain.chains.combine_documents import create_stuff_documents_chain
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain_community.chat_message_histories import ChatMessageHistory
6
+ from langchain_core.chat_history import BaseChatMessageHistory
7
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
8
+ from langchain_groq import ChatGroq
9
+ from langchain_core.runnables.history import RunnableWithMessageHistory
10
+ from langchain_huggingface import HuggingFaceEmbeddings
11
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
12
+ from langchain_community.document_loaders import PyPDFLoader
13
+ import os
14
+
15
+ from dotenv import load_dotenv
16
+ load_dotenv()
17
+
18
+ ## Set up Streamlit
19
+ st.title("RAG-based Conversational Chatbot")
20
+ st.write("Upload PDFs and chat with their content")
21
+
22
+ ## Input the Groq API Key and Hugging Face API Key
23
+ groq_api_key = st.text_input("Enter your Groq API key:", type="password")
24
+ hf_api_key = st.text_input("Enter your Hugging Face API key:", type="password")
25
+
26
+ ## Check if both API keys are provided
27
+ if groq_api_key and hf_api_key:
28
+ os.environ['HF_TOKEN'] = hf_api_key
29
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
30
+
31
+ llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
32
+
33
+ ## Chat interface
34
+ session_id = st.text_input("Session ID", value="default_session")
35
+
36
+ ## Statefully manage chat history
37
+ if 'store' not in st.session_state:
38
+ st.session_state.store = {}
39
+
40
+ uploaded_files = st.file_uploader("Choose a PDF file", type="pdf", accept_multiple_files=True)
41
+
42
+ ## Process uploaded PDFs
43
+ if uploaded_files:
44
+ documents = []
45
+ for uploaded_file in uploaded_files:
46
+ temppdf = f"./temp.pdf"
47
+ with open(temppdf, "wb") as file:
48
+ file.write(uploaded_file.getvalue())
49
+ file_name = uploaded_file.name
50
+
51
+ loader = PyPDFLoader(temppdf)
52
+ docs = loader.load()
53
+ documents.extend(docs)
54
+
55
+ # Split and create embeddings for the documents
56
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
57
+ splits = text_splitter.split_documents(documents)
58
+
59
+ vectorstore = FAISS.from_documents(documents, embeddings)
60
+
61
+ retriever = vectorstore.as_retriever()
62
+
63
+ contextualize_q_system_prompt = ("""
64
+ Note: this is very important and high priority, If the human prompt is looking for an answer which is out of
65
+ context given, clearly state that "you don't know and tell it's out of context".
66
+ You are provided with a chat history and the latest user question,
67
+ which may refer to previous messages. Your task is to rewrite the
68
+ latest user question into a standalone question that does not rely
69
+ on prior context for understanding. Ensure the reformulated question
70
+ is clear and concise. If no rephrasing is needed, return the question
71
+ unchanged. Do not provide an answer.
72
+ """)
73
+
74
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
75
+ [
76
+ ("system", contextualize_q_system_prompt),
77
+ MessagesPlaceholder("chat_history"),
78
+ ("human", "{input}"),
79
+ ]
80
+ )
81
+
82
+ history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
83
+
84
+ # Answer question
85
+ system_prompt = """
86
+ You are an assistant specialized in answering questions.
87
+ Utilize the provided retrieved context to formulate your response.
88
+ Note: this is very important and high priority, If the human prompt is looking for an answer which is out of
89
+ context given, clearly state that "you don't know and tell it's out of context".
90
+
91
+ {context}
92
+ """
93
+
94
+ qa_prompt = ChatPromptTemplate.from_messages(
95
+ [
96
+ ("system", system_prompt),
97
+ MessagesPlaceholder("chat_history"),
98
+ ("human", "{input}"),
99
+ ]
100
+ )
101
+
102
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
103
+ rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
104
+
105
+ def get_session_history(session: str) -> BaseChatMessageHistory:
106
+ if session_id not in st.session_state.store:
107
+ st.session_state.store[session_id] = ChatMessageHistory()
108
+ return st.session_state.store[session_id]
109
+
110
+ conversational_rag_chain = RunnableWithMessageHistory(
111
+ rag_chain, get_session_history,
112
+ input_messages_key="input",
113
+ history_messages_key="chat_history",
114
+ output_messages_key="answer"
115
+ )
116
+
117
+ user_input = st.text_input("Your question:")
118
+ if user_input:
119
+ session_history = get_session_history(session_id)
120
+ response = conversational_rag_chain.invoke(
121
+ {"input": user_input},
122
+ config={
123
+ "configurable": {"session_id": session_id}
124
+ },
125
+ )
126
+ st.write(st.session_state.store)
127
+ st.write("Assistant:", response['answer'])
128
+ st.write("Chat History:", session_history.messages)
129
+ else:
130
+ st.warning("Please enter both the Groq API Key and Hugging Face API Key.")
requirements.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ ipykernel
3
+ langchain_community
4
+ python-dotenv
5
+ pypdf
6
+ bs4
7
+ arxiv
8
+ pymupdf
9
+ wikipedia
10
+ lxml
11
+ langchain-openai
12
+ langchain-text-splitters
13
+ chromadb
14
+ sentence_transformers
15
+ langchain_huggingface
16
+ faiss-cpu
17
+ langchain_chroma
18
+ langchain-groq
19
+ fastapi
20
+ uvicorn
21
+ langserve
22
+ streamlit
23
+ langchain-core