Update app.py
Browse files
app.py
CHANGED
@@ -20,15 +20,6 @@ import torch
|
|
20 |
import tqdm
|
21 |
import accelerate
|
22 |
|
23 |
-
#Set parameters
|
24 |
-
|
25 |
-
llm_model = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
|
26 |
-
list_file_obj = '/home/user/app/pdfs/'
|
27 |
-
slider_chunk_size = 1024
|
28 |
-
slider_chunk_overlap = 128
|
29 |
-
temperature = 0.1
|
30 |
-
max_tokens = 6000
|
31 |
-
top_k = 3
|
32 |
|
33 |
|
34 |
# default_persist_directory = './chroma_HF/'
|
@@ -247,29 +238,64 @@ def demo():
|
|
247 |
vector_db = gr.State()
|
248 |
qa_chain = gr.State()
|
249 |
collection_name = gr.State()
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
with gr.Row():
|
258 |
-
|
259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
with gr.Row():
|
261 |
-
|
262 |
-
source2_page = gr.Number(label="Page", scale=1)
|
263 |
with gr.Row():
|
264 |
-
|
265 |
-
|
266 |
-
with gr.Row():
|
267 |
-
msg = gr.Textbox(placeholder="Type message", container=True)
|
268 |
-
with gr.Row():
|
269 |
-
db_btn = gr.Button("Generate vector database...")
|
270 |
-
qachain_btn = gr.Button("Initialize question-answering chain...")
|
271 |
-
submit_btn = gr.Button("Submit")
|
272 |
-
clear_btn = gr.ClearButton([msg, chatbot])
|
273 |
|
274 |
# Preprocessing events
|
275 |
#upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
|
|
|
20 |
import tqdm
|
21 |
import accelerate
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
|
25 |
# default_persist_directory = './chroma_HF/'
|
|
|
238 |
vector_db = gr.State()
|
239 |
qa_chain = gr.State()
|
240 |
collection_name = gr.State()
|
241 |
+
|
242 |
+
gr.Markdown(
|
243 |
+
"""<center><h2>PDF-based chatbot (powered by LangChain and open-source LLMs)</center></h2>
|
244 |
+
<h3>Ask any questions about your PDF documents, along with follow-ups</h3>
|
245 |
+
<b>Note:</b> This AI assistant performs retrieval-augmented generation from your PDF documents. \
|
246 |
+
When generating answers, it takes past questions into account (via conversational memory), and includes document references for clarity purposes.</i>
|
247 |
+
<br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate an output.<br>
|
248 |
+
""")
|
249 |
+
with gr.Tab("Step 1 - Document pre-processing"):
|
250 |
+
with gr.Row():
|
251 |
+
document = gr.Files(value = '/home/user/app/pdfs/Annual-Report-2022-2023-English_1.pdf',visible=True,
|
252 |
+
height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
|
253 |
+
# upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
|
254 |
+
with gr.Row():
|
255 |
+
db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
|
256 |
+
with gr.Accordion("Advanced options - Document text splitter", open=False):
|
257 |
+
with gr.Row():
|
258 |
+
slider_chunk_size = gr.Slider(minimum = 100, maximum = 1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
|
259 |
+
with gr.Row():
|
260 |
+
slider_chunk_overlap = gr.Slider(minimum = 10, maximum = 200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
|
261 |
+
with gr.Row():
|
262 |
+
db_progress = gr.Textbox(label="Vector database initialization", value="None")
|
263 |
with gr.Row():
|
264 |
+
db_btn = gr.Button("Generate vector database...")
|
265 |
+
|
266 |
+
with gr.Tab("Step 2 - QA chain initialization"):
|
267 |
+
with gr.Row():
|
268 |
+
llm_btn = gr.Radio(list_llm_simple, \
|
269 |
+
label="LLM models", value = list_llm_simple[0], type="index", info="Choose your LLM model")
|
270 |
+
with gr.Accordion("Advanced options - LLM model", open=False):
|
271 |
+
with gr.Row():
|
272 |
+
slider_temperature = gr.Slider(minimum = 0.0, maximum = 1.0, value=0.7, step=0.1, label="Temperature", info="Model temperature", interactive=True)
|
273 |
+
with gr.Row():
|
274 |
+
slider_maxtokens = gr.Slider(minimum = 224, maximum = 4096, value=1024, step=32, label="Max Tokens", info="Model max tokens", interactive=True)
|
275 |
+
with gr.Row():
|
276 |
+
slider_topk = gr.Slider(minimum = 1, maximum = 10, value=3, step=1, label="top-k samples", info="Model top-k samples", interactive=True)
|
277 |
+
with gr.Row():
|
278 |
+
llm_progress = gr.Textbox(value="None",label="QA chain initialization")
|
279 |
+
with gr.Row():
|
280 |
+
qachain_btn = gr.Button("Initialize question-answering chain...")
|
281 |
+
|
282 |
+
with gr.Tab("Step 3 - Conversation with chatbot"):
|
283 |
+
chatbot = gr.Chatbot(height=300)
|
284 |
+
with gr.Accordion("Advanced - Document references", open=False):
|
285 |
+
with gr.Row():
|
286 |
+
doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
|
287 |
+
source1_page = gr.Number(label="Page", scale=1)
|
288 |
+
with gr.Row():
|
289 |
+
doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
|
290 |
+
source2_page = gr.Number(label="Page", scale=1)
|
291 |
+
with gr.Row():
|
292 |
+
doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
|
293 |
+
source3_page = gr.Number(label="Page", scale=1)
|
294 |
with gr.Row():
|
295 |
+
msg = gr.Textbox(placeholder="Type message", container=True)
|
|
|
296 |
with gr.Row():
|
297 |
+
submit_btn = gr.Button("Submit")
|
298 |
+
clear_btn = gr.ClearButton([msg, chatbot])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
|
300 |
# Preprocessing events
|
301 |
#upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
|