singhamal1710 commited on
Commit
554d4a4
·
verified ·
1 Parent(s): 81b03f3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_chat import message
3
+ from langchain.chains import ConversationalRetrievalChain
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.llms import LlamaCpp
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.document_loaders import PyPDFLoader
10
+ import os
11
+ import tempfile
12
+ from langchain.document_loaders import PyPDFDirectoryLoader
13
+ from langchain.chains import RetrievalQA
14
+
15
+
16
+
17
+
18
+ def initialize_session_state():
19
+ if 'history' not in st.session_state:
20
+ st.session_state['history'] = []
21
+
22
+ if 'generated' not in st.session_state:
23
+ st.session_state['generated'] = ["Hello! Ask me anything about 🤗"]
24
+
25
+ if 'past' not in st.session_state:
26
+ st.session_state['past'] = ["Hey! 👋"]
27
+
28
+ def conversation_chat(query, chain, history):
29
+ result = chain({"question": query, "chat_history": history})
30
+ history.append((query, result["answer"]))
31
+ return result["answer"]
32
+
33
+ def display_chat_history(chain):
34
+ reply_container = st.container()
35
+ container = st.container()
36
+
37
+ with container:
38
+ with st.form(key='my_form', clear_on_submit=True):
39
+ user_input = st.text_input("Question:", placeholder="Ask about your PDF", key='input')
40
+ submit_button = st.form_submit_button(label='Send')
41
+
42
+ if submit_button and user_input:
43
+ with st.spinner('Generating response...'):
44
+ output = conversation_chat(user_input, chain, st.session_state['history'])
45
+
46
+ st.session_state['past'].append(user_input)
47
+ st.session_state['generated'].append(output)
48
+
49
+ if st.session_state['generated']:
50
+ with reply_container:
51
+ for i in range(len(st.session_state['generated'])):
52
+ message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
53
+ message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")
54
+
55
+ def create_conversational_chain(vector_store):
56
+ # Create llm
57
+ # Importing the Model
58
+ llm = LlamaCpp(
59
+ streaming = True,
60
+ model_path = "model/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
61
+ temperature = 0.75,
62
+ top_p = 1,
63
+ verbose = True,
64
+ n_ctx = 4096
65
+ )
66
+
67
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
68
+
69
+ chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',
70
+ retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
71
+ memory=memory)
72
+ return chain
73
+
74
+ def main():
75
+ # Initialize session state
76
+ initialize_session_state()
77
+ st.title("Multi-PDF ChatBot using Mistral-7B-Instruct :books:")
78
+ # Initialize Streamlit
79
+ # st.sidebar.title("Document Processing")
80
+ # uploaded_files = st.sidebar.file_uploader("Upload files", accept_multiple_files=True)
81
+ # Loading the file directory using PyPDF Directory
82
+ loader = PyPDFDirectoryLoader("data_pdf/")
83
+ data = loader.load()
84
+
85
+ # Splitting extracted text data into chunks for easier processing
86
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100000, chunk_overlap = 20)
87
+ text_chunks = text_splitter.split_documents(data)
88
+
89
+ # Downloading Embessings
90
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
91
+
92
+ # Creating Embeddings for each of the text chunk
93
+ vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
94
+
95
+ # Create the chain object
96
+ chain = create_conversational_chain(vector_store)
97
+
98
+ display_chat_history(chain)
99
+
100
+ if __name__ == "__main__":
101
+ main()