vishwask commited on
Commit
2c86e3f
·
verified ·
1 Parent(s): 73b18f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -29
app.py CHANGED
@@ -20,15 +20,6 @@ import torch
20
  import tqdm
21
  import accelerate
22
 
23
- #Set parameters
24
-
25
- llm_model = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
26
- list_file_obj = '/home/user/app/pdfs/'
27
- slider_chunk_size = 1024
28
- slider_chunk_overlap = 128
29
- temperature = 0.1
30
- max_tokens = 6000
31
- top_k = 3
32
 
33
 
34
  # default_persist_directory = './chroma_HF/'
@@ -247,29 +238,64 @@ def demo():
247
  vector_db = gr.State()
248
  qa_chain = gr.State()
249
  collection_name = gr.State()
250
-
251
-
252
- document = gr.Files(value = '/home/user/app/pdfs/Annual-Report-2022-2023-English_1.pdf',height=100,
253
- file_count="multiple", file_types=["pdf"], interactive=True, visible=False,
254
- label="Upload your PDF documents (single or multiple)")
255
- chatbot = gr.Chatbot(height=300)
256
- with gr.Accordion("Advanced - Document references", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  with gr.Row():
258
- doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
259
- source1_page = gr.Number(label="Page", scale=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  with gr.Row():
261
- doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
262
- source2_page = gr.Number(label="Page", scale=1)
263
  with gr.Row():
264
- doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
265
- source3_page = gr.Number(label="Page", scale=1)
266
- with gr.Row():
267
- msg = gr.Textbox(placeholder="Type message", container=True)
268
- with gr.Row():
269
- db_btn = gr.Button("Generate vector database...")
270
- qachain_btn = gr.Button("Initialize question-answering chain...")
271
- submit_btn = gr.Button("Submit")
272
- clear_btn = gr.ClearButton([msg, chatbot])
273
 
274
  # Preprocessing events
275
  #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
 
20
  import tqdm
21
  import accelerate
22
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  # default_persist_directory = './chroma_HF/'
 
238
  vector_db = gr.State()
239
  qa_chain = gr.State()
240
  collection_name = gr.State()
241
+
242
+ gr.Markdown(
243
+ """<center><h2>PDF-based chatbot (powered by LangChain and open-source LLMs)</center></h2>
244
+ <h3>Ask any questions about your PDF documents, along with follow-ups</h3>
245
+ <b>Note:</b> This AI assistant performs retrieval-augmented generation from your PDF documents. \
246
+ When generating answers, it takes past questions into account (via conversational memory), and includes document references for clarity purposes.</i>
247
+ <br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate an output.<br>
248
+ """)
249
+ with gr.Tab("Step 1 - Document pre-processing"):
250
+ with gr.Row():
251
+ document = gr.Files(value = '/home/user/app/pdfs/Annual-Report-2022-2023-English_1.pdf',visible=True,
252
+ height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
253
+ # upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
254
+ with gr.Row():
255
+ db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
256
+ with gr.Accordion("Advanced options - Document text splitter", open=False):
257
+ with gr.Row():
258
+ slider_chunk_size = gr.Slider(minimum = 100, maximum = 1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
259
+ with gr.Row():
260
+ slider_chunk_overlap = gr.Slider(minimum = 10, maximum = 200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
261
+ with gr.Row():
262
+ db_progress = gr.Textbox(label="Vector database initialization", value="None")
263
  with gr.Row():
264
+ db_btn = gr.Button("Generate vector database...")
265
+
266
+ with gr.Tab("Step 2 - QA chain initialization"):
267
+ with gr.Row():
268
+ llm_btn = gr.Radio(list_llm_simple, \
269
+ label="LLM models", value = list_llm_simple[0], type="index", info="Choose your LLM model")
270
+ with gr.Accordion("Advanced options - LLM model", open=False):
271
+ with gr.Row():
272
+ slider_temperature = gr.Slider(minimum = 0.0, maximum = 1.0, value=0.7, step=0.1, label="Temperature", info="Model temperature", interactive=True)
273
+ with gr.Row():
274
+ slider_maxtokens = gr.Slider(minimum = 224, maximum = 4096, value=1024, step=32, label="Max Tokens", info="Model max tokens", interactive=True)
275
+ with gr.Row():
276
+ slider_topk = gr.Slider(minimum = 1, maximum = 10, value=3, step=1, label="top-k samples", info="Model top-k samples", interactive=True)
277
+ with gr.Row():
278
+ llm_progress = gr.Textbox(value="None",label="QA chain initialization")
279
+ with gr.Row():
280
+ qachain_btn = gr.Button("Initialize question-answering chain...")
281
+
282
+ with gr.Tab("Step 3 - Conversation with chatbot"):
283
+ chatbot = gr.Chatbot(height=300)
284
+ with gr.Accordion("Advanced - Document references", open=False):
285
+ with gr.Row():
286
+ doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
287
+ source1_page = gr.Number(label="Page", scale=1)
288
+ with gr.Row():
289
+ doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
290
+ source2_page = gr.Number(label="Page", scale=1)
291
+ with gr.Row():
292
+ doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
293
+ source3_page = gr.Number(label="Page", scale=1)
294
  with gr.Row():
295
+ msg = gr.Textbox(placeholder="Type message", container=True)
 
296
  with gr.Row():
297
+ submit_btn = gr.Button("Submit")
298
+ clear_btn = gr.ClearButton([msg, chatbot])
 
 
 
 
 
 
 
299
 
300
  # Preprocessing events
301
  #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])