KhacHuy commited on
Commit
6253266
·
verified ·
1 Parent(s): ccdfe75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +301 -301
app.py CHANGED
@@ -1,301 +1,301 @@
1
- # import os
2
- # from PyPDF2 import PdfReader
3
- # from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- # from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
- # import streamlit as st
6
- # import google.generativeai as genai
7
- # from langchain.vectorstores import FAISS
8
- # from langchain.prompts import PromptTemplate
9
- # from dotenv import load_dotenv
10
- # from langchain_community.embeddings import SentenceTransformerEmbeddings
11
- # from docx import Document # Thêm import để đọc file docx
12
-
13
- # load_dotenv()
14
-
15
- # genai.configure(api_key="AIzaSyC5hcS1goQ7emeXmyk_7eEQIie7J8OomC4") # Thay YOUR_API_KEY bằng API key của bạn
16
- # model = genai.GenerativeModel('gemini-1.5-flash')
17
-
18
- # # Đọc tất cả PDF và trả về văn bản
19
- # def get_pdf_text(pdf_docs):
20
- # text = ""
21
- # for pdf in pdf_docs:
22
- # pdf_reader = PdfReader(pdf)
23
- # for page in pdf_reader.pages:
24
- # text += page.extract_text() or ""
25
- # return text
26
-
27
- # # Đọc tất cả DOCX và trả về văn bản
28
- # def get_docx_text(docx_docs):
29
- # text = ""
30
- # for doc in docx_docs:
31
- # document = Document(doc)
32
- # for paragraph in document.paragraphs:
33
- # text += paragraph.text # Đảm bảo chuỗi này được đóng đúng cách
34
- # return text
35
-
36
- # # Tách văn bản thành các đoạn
37
- # def get_text_chunks(text):
38
- # splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
39
- # chunks = splitter.split_text(text)
40
- # return chunks
41
-
42
- # # Tạo vector store từ các đoạn văn bản
43
- # def get_vector_store(chunks):
44
- # embeddings = SentenceTransformerEmbeddings(model_name="keepitreal/vietnamese-sbert", model_kwargs={"trust_remote_code": True})
45
- # vector_store = FAISS.from_texts(chunks, embedding=embeddings)
46
- # vector_store.save_local("faiss_index")
47
-
48
- # # Tạo chuỗi hỏi đáp
49
- # def create_qa_chain(prompt, db):
50
- # def custom_llm(query, context):
51
- # full_prompt = prompt.format(context=context, question=query)
52
- # response = model.generate_content(full_prompt)
53
- # if "Tôi không biết" in response.text:
54
- # response = model.generate_content(query)
55
- # return response.text
56
-
57
- # class CustomRetrievalQA:
58
- # def __init__(self, retriever, prompt):
59
- # self.retriever = retriever
60
- # self.prompt = prompt
61
-
62
- # def invoke(self, inputs):
63
- # query = inputs["query"]
64
- # docs = self.retriever.get_relevant_documents(query)
65
- # context = " ".join([doc.page_content for doc in docs])
66
- # answer = custom_llm(query, context)
67
- # return {"answer": answer}
68
-
69
- # retriever = db.as_retriever(search_kwargs={"k": 3}, max_tokens_limit=6000)
70
- # return CustomRetrievalQA(retriever, prompt)
71
-
72
- # def clear_chat_history():
73
- # st.session_state.messages = [{"role": "assistant", "content": "Upload some PDFs or DOCs and ask me a question."}]
74
-
75
- # def user_input(user_question):
76
- # embeddings = SentenceTransformerEmbeddings(model_name="keepitreal/vietnamese-sbert", model_kwargs={"trust_remote_code": True})
77
- # new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
78
- # retriever = new_db.as_retriever()
79
- # prompt_template = """
80
- # Trả lời câu hỏi chi tiết nhất có thể từ ngữ cảnh được cung cấp. Nếu câu trả lời không nằm trong ngữ cảnh được cung cấp, hãy nói, "câu trả lời không có trong ngữ cảnh".
81
-
82
- # Context:\n {context}\n
83
- # Question: \n{question}\n
84
-
85
- # Trả lời:
86
- # """
87
- # qa_chain = create_qa_chain(prompt_template, new_db)
88
-
89
- # response = qa_chain.invoke({"query": user_question})
90
-
91
- # return {"output_text": [response["answer"]]}
92
-
93
- # def main():
94
- # st.set_page_config(page_title="Gemini PDF & DOC Chatbot", page_icon="🤖")
95
-
96
- # # Sidebar for uploading PDF and DOCX files
97
- # with st.sidebar:
98
- # st.title("Menu:")
99
- # pdf_docs = st.file_uploader("Upload your PDF Files", type=["pdf"], accept_multiple_files=True)
100
- # docx_docs = st.file_uploader("Upload your DOCX Files", type=["docx"], accept_multiple_files=True)
101
-
102
- # if st.button("Submit & Process"):
103
- # with st.spinner("Processing..."):
104
- # raw_text = get_pdf_text(pdf_docs)
105
- # raw_text += get_docx_text(docx_docs) # Kết hợp văn bản từ PDF và DOCX
106
- # if raw_text:
107
- # text_chunks = get_text_chunks(raw_text)
108
- # get_vector_store(text_chunks)
109
- # st.success("Done")
110
- # else:
111
- # st.error("No text extracted from the PDFs or DOCX files.")
112
-
113
- # # Main content area for displaying chat messages
114
- # st.title("Chat with PDF and DOCX files using Gemini🤖")
115
- # st.write("Welcome to the chat!")
116
- # st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
117
-
118
- # # Chat input
119
- # if "messages" not in st.session_state.keys():
120
- # st.session_state.messages = [{"role": "assistant", "content": "Upload some PDFs or DOCs and ask me a question."}]
121
-
122
- # for message in st.session_state.messages:
123
- # with st.chat_message(message["role"]):
124
- # st.write(message["content"])
125
-
126
- # if prompt := st.chat_input():
127
- # st.session_state.messages.append({"role": "user", "content": prompt})
128
- # with st.chat_message("user"):
129
- # st.write(prompt)
130
-
131
- # # Display chat messages and bot response
132
- # if st.session_state.messages and st.session_state.messages[-1]["role"] != "assistant":
133
- # with st.chat_message("assistant"):
134
- # with st.spinner("Thinking..."):
135
- # response = user_input(prompt)
136
- # placeholder = st.empty()
137
- # full_response = ''
138
- # for item in response['output_text']:
139
- # full_response += item
140
- # placeholder.markdown(full_response)
141
- # placeholder.markdown(full_response)
142
-
143
- # if full_response:
144
- # message = {"role": "assistant", "content": full_response}
145
- # st.session_state.messages.append(message)
146
-
147
- # if __name__ == "__main__":
148
- # main()
149
-
150
-
151
-
152
-
153
- import os
154
- from PyPDF2 import PdfReader
155
- from langchain.text_splitter import RecursiveCharacterTextSplitter
156
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
157
- import streamlit as st
158
- import google.generativeai as genai
159
- from langchain.vectorstores import FAISS
160
- from langchain.prompts import PromptTemplate
161
- from dotenv import load_dotenv
162
- from langchain_community.embeddings import SentenceTransformerEmbeddings
163
- from docx import Document # Thêm import để đọc file docx
164
-
165
- load_dotenv()
166
-
167
- genai.configure(api_key="AIzaSyC5hcS1goQ7emeXmyk_7eEQIie7J8OomC4") # Thay YOUR_API_KEY bằng API key của bạn
168
- model = genai.GenerativeModel('gemini-1.5-flash')
169
-
170
- # Đọc tất cả PDF và trả về văn bản
171
- def get_pdf_text(pdf_docs):
172
- text = ""
173
- for pdf in pdf_docs:
174
- pdf_reader = PdfReader(pdf)
175
- for page in pdf_reader.pages:
176
- text += page.extract_text() or ""
177
- return text
178
-
179
- # Đọc tất cả DOCX và trả về văn bản
180
- def get_docx_text(docx_docs):
181
- text = ""
182
- for doc in docx_docs:
183
- document = Document(doc)
184
- for paragraph in document.paragraphs:
185
- text += paragraph.text
186
- return text
187
-
188
- # Tách văn bản thành các đoạn
189
- def get_text_chunks(text):
190
- splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
191
- chunks = splitter.split_text(text)
192
- return chunks
193
-
194
- # Tạo vector store từ các đoạn văn bản
195
- def get_vector_store(chunks):
196
- embeddings = SentenceTransformerEmbeddings(model_name="keepitreal/vietnamese-sbert", model_kwargs={"trust_remote_code": True})
197
- vector_store = FAISS.from_texts(chunks, embedding=embeddings)
198
- vector_store.save_local("faiss_index")
199
-
200
- # Tạo chuỗi hỏi đáp
201
- def create_qa_chain(prompt, db):
202
- def custom_llm(query, context):
203
- full_prompt = prompt.format(context=context, question=query)
204
- response = model.generate_content(full_prompt)
205
- if "Tôi không biết" in response.text:
206
- response = model.generate_content(query)
207
- return response.text
208
-
209
- class CustomRetrievalQA:
210
- def __init__(self, retriever, prompt):
211
- self.retriever = retriever
212
- self.prompt = prompt
213
-
214
- def invoke(self, inputs):
215
- query = inputs["query"]
216
- docs = self.retriever.get_relevant_documents(query)
217
- context = " ".join([doc.page_content for doc in docs])
218
- answer = custom_llm(query, context)
219
- return {"answer": answer}
220
-
221
- retriever = db.as_retriever(search_kwargs={"k": 3}, max_tokens_limit=6000)
222
- return CustomRetrievalQA(retriever, prompt)
223
-
224
- def clear_chat_history():
225
- st.session_state.messages = [{"role": "assistant", "content": "Upload some PDFs or DOCs and ask me a question."}]
226
-
227
- def user_input(user_question):
228
- embeddings = SentenceTransformerEmbeddings(model_name="keepitreal/vietnamese-sbert", model_kwargs={"trust_remote_code": True})
229
- new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
230
- retriever = new_db.as_retriever()
231
- prompt_template = """
232
- Trả lời câu hỏi chi tiết nhất có thể từ ngữ cảnh được cung cấp. Nếu câu trả lời không nằm trong ngữ cảnh được cung cấp, hãy nói, "câu trả lời không có trong ngữ cảnh".
233
-
234
- Context:\n {context}\n
235
- Question: \n{question}\n
236
-
237
- Trả lời:
238
- """
239
- qa_chain = create_qa_chain(prompt_template, new_db)
240
-
241
- response = qa_chain.invoke({"query": user_question})
242
-
243
- return {"output_text": [response["answer"]]}
244
-
245
- def main():
246
- st.set_page_config(page_title="Gemini PDF & DOC Chatbot", page_icon="🤖")
247
-
248
- # Sidebar for uploading PDF and DOCX files
249
- with st.sidebar:
250
- st.title("Menu:")
251
- pdf_docs = st.file_uploader("Upload your PDF Files", type=["pdf"], accept_multiple_files=True)
252
- docx_docs = st.file_uploader("Upload your DOCX Files", type=["docx"], accept_multiple_files=True)
253
-
254
- if st.button("Submit & Process"):
255
- with st.spinner("Processing..."):
256
- raw_text = get_pdf_text(pdf_docs)
257
- raw_text += get_docx_text(docx_docs) # Kết hợp văn bản từ PDF và DOCX
258
- if raw_text:
259
- text_chunks = get_text_chunks(raw_text)
260
- get_vector_store(text_chunks)
261
- st.success(f"Processed {len(pdf_docs)} PDFs and {len(docx_docs)} DOCs.")
262
- else:
263
- st.error("No text extracted from the PDFs or DOCX files.")
264
-
265
- # Main content area for displaying chat messages
266
- st.title("Chat with PDF and DOCX files using Gemini🤖")
267
- st.write("Welcome to the chat!")
268
- st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
269
-
270
- # Chat input
271
- if "messages" not in st.session_state.keys():
272
- st.session_state.messages = [{"role": "assistant", "content": "Upload some PDFs or DOCs and ask me a question."}]
273
-
274
- for message in st.session_state.messages:
275
- with st.chat_message(message["role"]):
276
- st.write(message["content"])
277
-
278
- if prompt := st.chat_input():
279
- st.session_state.messages.append({"role": "user", "content": prompt})
280
- with st.chat_message("user"):
281
- st.write(prompt)
282
-
283
- # Display chat messages and bot response
284
- if st.session_state.messages and st.session_state.messages[-1]["role"] != "assistant":
285
- with st.chat_message("assistant"):
286
- with st.spinner("Thinking..."):
287
- response = user_input(prompt)
288
- placeholder = st.empty()
289
- full_response = ''
290
- for item in response['output_text']:
291
- full_response += item
292
- placeholder.markdown(full_response)
293
- placeholder.markdown(full_response)
294
-
295
- if full_response:
296
- message = {"role": "assistant", "content": full_response}
297
- st.session_state.messages.append(message)
298
-
299
- if __name__ == "__main__":
300
- main()
301
-
 
1
+ # import os
2
+ # from PyPDF2 import PdfReader
3
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ # from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
+ # import streamlit as st
6
+ # import google.generativeai as genai
7
+ # from langchain.vectorstores import FAISS
8
+ # from langchain.prompts import PromptTemplate
9
+ # from dotenv import load_dotenv
10
+ # from langchain_community.embeddings import SentenceTransformerEmbeddings
11
+ # from docx import Document # Thêm import để đọc file docx
12
+
13
+ # load_dotenv()
14
+
15
+ # genai.configure(api_key="AIzaSyC5hcS1goQ7emeXmyk_7eEQIie7J8OomC4") # Thay YOUR_API_KEY bằng API key của bạn
16
+ # model = genai.GenerativeModel('gemini-1.5-flash')
17
+
18
+ # # Đọc tất cả PDF và trả về văn bản
19
+ # def get_pdf_text(pdf_docs):
20
+ # text = ""
21
+ # for pdf in pdf_docs:
22
+ # pdf_reader = PdfReader(pdf)
23
+ # for page in pdf_reader.pages:
24
+ # text += page.extract_text() or ""
25
+ # return text
26
+
27
+ # # Đọc tất cả DOCX và trả về văn bản
28
+ # def get_docx_text(docx_docs):
29
+ # text = ""
30
+ # for doc in docx_docs:
31
+ # document = Document(doc)
32
+ # for paragraph in document.paragraphs:
33
+ # text += paragraph.text # Đảm bảo chuỗi này được đóng đúng cách
34
+ # return text
35
+
36
+ # # Tách văn bản thành các đoạn
37
+ # def get_text_chunks(text):
38
+ # splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
39
+ # chunks = splitter.split_text(text)
40
+ # return chunks
41
+
42
+ # # Tạo vector store từ các đoạn văn bản
43
+ # def get_vector_store(chunks):
44
+ # embeddings = SentenceTransformerEmbeddings(model_name="keepitreal/vietnamese-sbert", model_kwargs={"trust_remote_code": True})
45
+ # vector_store = FAISS.from_texts(chunks, embedding=embeddings)
46
+ # vector_store.save_local("faiss_index")
47
+
48
+ # # Tạo chuỗi hỏi đáp
49
+ # def create_qa_chain(prompt, db):
50
+ # def custom_llm(query, context):
51
+ # full_prompt = prompt.format(context=context, question=query)
52
+ # response = model.generate_content(full_prompt)
53
+ # if "câu trả lời không có trong ngữ cảnh" in response.text:
54
+ # response = model.generate_content(query)
55
+ # return response.text
56
+
57
+ # class CustomRetrievalQA:
58
+ # def __init__(self, retriever, prompt):
59
+ # self.retriever = retriever
60
+ # self.prompt = prompt
61
+
62
+ # def invoke(self, inputs):
63
+ # query = inputs["query"]
64
+ # docs = self.retriever.get_relevant_documents(query)
65
+ # context = " ".join([doc.page_content for doc in docs])
66
+ # answer = custom_llm(query, context)
67
+ # return {"answer": answer}
68
+
69
+ # retriever = db.as_retriever(search_kwargs={"k": 3}, max_tokens_limit=6000)
70
+ # return CustomRetrievalQA(retriever, prompt)
71
+
72
+ # def clear_chat_history():
73
+ # st.session_state.messages = [{"role": "assistant", "content": "Upload some PDFs or DOCs and ask me a question."}]
74
+
75
+ # def user_input(user_question):
76
+ # embeddings = SentenceTransformerEmbeddings(model_name="keepitreal/vietnamese-sbert", model_kwargs={"trust_remote_code": True})
77
+ # new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
78
+ # retriever = new_db.as_retriever()
79
+ # prompt_template = """
80
+ # Trả lời câu hỏi chi tiết nhất có thể từ ngữ cảnh được cung cấp. Nếu câu trả lời không nằm trong ngữ cảnh được cung cấp, hãy nói, "câu trả lời không có trong ngữ cảnh".
81
+
82
+ # Context:\n {context}\n
83
+ # Question: \n{question}\n
84
+
85
+ # Trả lời:
86
+ # """
87
+ # qa_chain = create_qa_chain(prompt_template, new_db)
88
+
89
+ # response = qa_chain.invoke({"query": user_question})
90
+
91
+ # return {"output_text": [response["answer"]]}
92
+
93
+ # def main():
94
+ # st.set_page_config(page_title="Gemini PDF & DOC Chatbot", page_icon="🤖")
95
+
96
+ # # Sidebar for uploading PDF and DOCX files
97
+ # with st.sidebar:
98
+ # st.title("Menu:")
99
+ # pdf_docs = st.file_uploader("Upload your PDF Files", type=["pdf"], accept_multiple_files=True)
100
+ # docx_docs = st.file_uploader("Upload your DOCX Files", type=["docx"], accept_multiple_files=True)
101
+
102
+ # if st.button("Submit & Process"):
103
+ # with st.spinner("Processing..."):
104
+ # raw_text = get_pdf_text(pdf_docs)
105
+ # raw_text += get_docx_text(docx_docs) # Kết hợp văn bản từ PDF và DOCX
106
+ # if raw_text:
107
+ # text_chunks = get_text_chunks(raw_text)
108
+ # get_vector_store(text_chunks)
109
+ # st.success("Done")
110
+ # else:
111
+ # st.error("No text extracted from the PDFs or DOCX files.")
112
+
113
+ # # Main content area for displaying chat messages
114
+ # st.title("Chat with PDF and DOCX files using Gemini🤖")
115
+ # st.write("Welcome to the chat!")
116
+ # st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
117
+
118
+ # # Chat input
119
+ # if "messages" not in st.session_state.keys():
120
+ # st.session_state.messages = [{"role": "assistant", "content": "Upload some PDFs or DOCs and ask me a question."}]
121
+
122
+ # for message in st.session_state.messages:
123
+ # with st.chat_message(message["role"]):
124
+ # st.write(message["content"])
125
+
126
+ # if prompt := st.chat_input():
127
+ # st.session_state.messages.append({"role": "user", "content": prompt})
128
+ # with st.chat_message("user"):
129
+ # st.write(prompt)
130
+
131
+ # # Display chat messages and bot response
132
+ # if st.session_state.messages and st.session_state.messages[-1]["role"] != "assistant":
133
+ # with st.chat_message("assistant"):
134
+ # with st.spinner("Thinking..."):
135
+ # response = user_input(prompt)
136
+ # placeholder = st.empty()
137
+ # full_response = ''
138
+ # for item in response['output_text']:
139
+ # full_response += item
140
+ # placeholder.markdown(full_response)
141
+ # placeholder.markdown(full_response)
142
+
143
+ # if full_response:
144
+ # message = {"role": "assistant", "content": full_response}
145
+ # st.session_state.messages.append(message)
146
+
147
+ # if __name__ == "__main__":
148
+ # main()
149
+
150
+
151
+
152
+
153
+ import os
154
+ from PyPDF2 import PdfReader
155
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
156
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
157
+ import streamlit as st
158
+ import google.generativeai as genai
159
+ from langchain.vectorstores import FAISS
160
+ from langchain.prompts import PromptTemplate
161
+ from dotenv import load_dotenv
162
+ from langchain_community.embeddings import SentenceTransformerEmbeddings
163
+ from docx import Document # Thêm import để đọc file docx
164
+
165
+ load_dotenv()
166
+
167
+ genai.configure(api_key="AIzaSyC5hcS1goQ7emeXmyk_7eEQIie7J8OomC4") # Thay YOUR_API_KEY bằng API key của bạn
168
+ model = genai.GenerativeModel('gemini-1.5-flash')
169
+
170
+ # Đọc tất cả PDF và trả về văn bản
171
+ def get_pdf_text(pdf_docs):
172
+ text = ""
173
+ for pdf in pdf_docs:
174
+ pdf_reader = PdfReader(pdf)
175
+ for page in pdf_reader.pages:
176
+ text += page.extract_text() or ""
177
+ return text
178
+
179
+ # Đọc tất cả DOCX và trả về văn bản
180
+ def get_docx_text(docx_docs):
181
+ text = ""
182
+ for doc in docx_docs:
183
+ document = Document(doc)
184
+ for paragraph in document.paragraphs:
185
+ text += paragraph.text
186
+ return text
187
+
188
+ # Tách văn bản thành các đoạn
189
+ def get_text_chunks(text):
190
+ splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
191
+ chunks = splitter.split_text(text)
192
+ return chunks
193
+
194
+ # Tạo vector store từ các đoạn văn bản
195
+ def get_vector_store(chunks):
196
+ embeddings = SentenceTransformerEmbeddings(model_name="keepitreal/vietnamese-sbert", model_kwargs={"trust_remote_code": True})
197
+ vector_store = FAISS.from_texts(chunks, embedding=embeddings)
198
+ vector_store.save_local("faiss_index")
199
+
200
+ # Tạo chuỗi hỏi đáp
201
+ def create_qa_chain(prompt, db):
202
+ def custom_llm(query, context):
203
+ full_prompt = prompt.format(context=context, question=query)
204
+ response = model.generate_content(full_prompt)
205
+ if "Tôi không biết" in response.text:
206
+ response = model.generate_content(query)
207
+ return response.text
208
+
209
+ class CustomRetrievalQA:
210
+ def __init__(self, retriever, prompt):
211
+ self.retriever = retriever
212
+ self.prompt = prompt
213
+
214
+ def invoke(self, inputs):
215
+ query = inputs["query"]
216
+ docs = self.retriever.get_relevant_documents(query)
217
+ context = " ".join([doc.page_content for doc in docs])
218
+ answer = custom_llm(query, context)
219
+ return {"answer": answer}
220
+
221
+ retriever = db.as_retriever(search_kwargs={"k": 3}, max_tokens_limit=6000)
222
+ return CustomRetrievalQA(retriever, prompt)
223
+
224
+ def clear_chat_history():
225
+ st.session_state.messages = [{"role": "assistant", "content": "Upload some PDFs or DOCs and ask me a question."}]
226
+
227
+ def user_input(user_question):
228
+ embeddings = SentenceTransformerEmbeddings(model_name="keepitreal/vietnamese-sbert", model_kwargs={"trust_remote_code": True})
229
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
230
+ retriever = new_db.as_retriever()
231
+ prompt_template = """
232
+ Trả lời câu hỏi chi tiết nhất có thể từ ngữ cảnh được cung cấp. Nếu câu trả lời không nằm trong ngữ cảnh được cung cấp, hãy nói, "câu trả lời không có trong ngữ cảnh".
233
+
234
+ Context:\n {context}\n
235
+ Question: \n{question}\n
236
+
237
+ Trả lời:
238
+ """
239
+ qa_chain = create_qa_chain(prompt_template, new_db)
240
+
241
+ response = qa_chain.invoke({"query": user_question})
242
+
243
+ return {"output_text": [response["answer"]]}
244
+
245
+ def main():
246
+ st.set_page_config(page_title="Gemini PDF & DOC Chatbot", page_icon="🤖")
247
+
248
+ # Sidebar for uploading PDF and DOCX files
249
+ with st.sidebar:
250
+ st.title("Menu:")
251
+ pdf_docs = st.file_uploader("Upload your PDF Files", type=["pdf"], accept_multiple_files=True)
252
+ docx_docs = st.file_uploader("Upload your DOCX Files", type=["docx"], accept_multiple_files=True)
253
+
254
+ if st.button("Submit & Process"):
255
+ with st.spinner("Processing..."):
256
+ raw_text = get_pdf_text(pdf_docs)
257
+ raw_text += get_docx_text(docx_docs) # Kết hợp văn bản từ PDF và DOCX
258
+ if raw_text:
259
+ text_chunks = get_text_chunks(raw_text)
260
+ get_vector_store(text_chunks)
261
+ st.success(f"Processed {len(pdf_docs)} PDFs and {len(docx_docs)} DOCs.")
262
+ else:
263
+ st.error("No text extracted from the PDFs or DOCX files.")
264
+
265
+ # Main content area for displaying chat messages
266
+ st.title("Chat with PDF and DOCX files using Gemini🤖")
267
+ st.write("Welcome to the chat!")
268
+ st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
269
+
270
+ # Chat input
271
+ if "messages" not in st.session_state.keys():
272
+ st.session_state.messages = [{"role": "assistant", "content": "Upload some PDFs or DOCs and ask me a question."}]
273
+
274
+ for message in st.session_state.messages:
275
+ with st.chat_message(message["role"]):
276
+ st.write(message["content"])
277
+
278
+ if prompt := st.chat_input():
279
+ st.session_state.messages.append({"role": "user", "content": prompt})
280
+ with st.chat_message("user"):
281
+ st.write(prompt)
282
+
283
+ # Display chat messages and bot response
284
+ if st.session_state.messages and st.session_state.messages[-1]["role"] != "assistant":
285
+ with st.chat_message("assistant"):
286
+ with st.spinner("Thinking..."):
287
+ response = user_input(prompt)
288
+ placeholder = st.empty()
289
+ full_response = ''
290
+ for item in response['output_text']:
291
+ full_response += item
292
+ placeholder.markdown(full_response)
293
+ placeholder.markdown(full_response)
294
+
295
+ if full_response:
296
+ message = {"role": "assistant", "content": full_response}
297
+ st.session_state.messages.append(message)
298
+
299
+ if __name__ == "__main__":
300
+ main()
301
+