prasadnu commited on
Commit
0203fd7
·
1 Parent(s): 59c4f4e

rerank model

Browse files
pages/Multimodal_Conversational_Search.py CHANGED
@@ -34,6 +34,10 @@ st.set_page_config(
34
  layout="wide",
35
  page_icon="images/opensearch_mark_default.png"
36
  )
 
 
 
 
37
  parent_dirname = "/".join((os.path.dirname(__file__)).split("/")[0:-1])
38
  USER_ICON = "images/user.png"
39
  AI_ICON = "images/opensearch-twitter-card.png"
@@ -150,6 +154,7 @@ if clear:
150
 
151
 
152
  def handle_input():
 
153
  print("Question: "+st.session_state.input_query)
154
  print("-----------")
155
  print("\n\n")
@@ -178,31 +183,6 @@ def handle_input():
178
  'table':out_['table']
179
  })
180
  st.session_state.input_query=""
181
-
182
-
183
-
184
- # search_type = st.selectbox('Select the Search type',
185
- # ('Conversational Search (RAG)',
186
- # 'OpenSearch vector search',
187
- # 'LLM Text Generation'
188
- # ),
189
-
190
- # key = 'input_searchType',
191
- # help = "Select the type of retriever\n1. Conversational Search (Recommended) - This will include both the OpenSearch and LLM in the retrieval pipeline \n (note: This will put opensearch response as context to LLM to answer) \n2. OpenSearch vector search - This will put only OpenSearch's vector search in the pipeline, \n(Warning: this will lead to unformatted results )\n3. LLM Text Generation - This will include only LLM in the pipeline, \n(Warning: This will give hallucinated and out of context answers_)"
192
- # )
193
-
194
- # col1, col2, col3, col4 = st.columns(4)
195
-
196
- # with col1:
197
- # st.text_input('Temperature', value = "0.001", placeholder='LLM Temperature', key = 'input_temperature',help = "Set the temperature of the Large Language model. \n Note: 1. Set this to values lower to 1 in the order of 0.001, 0.0001, such low values reduces hallucination and creativity in the LLM response; 2. This applies only when LLM is a part of the retriever pipeline")
198
- # with col2:
199
- # st.number_input('Top K', value = 200, placeholder='Top K', key = 'input_topK', step = 50, help = "This limits the LLM's predictions to the top k most probable tokens at each step of generation, this applies only when LLM is a prt of the retriever pipeline")
200
- # with col3:
201
- # st.number_input('Top P', value = 0.95, placeholder='Top P', key = 'input_topP', step = 0.05, help = "This sets a threshold probability and selects the top tokens whose cumulative probability exceeds the threshold while the tokens are generated by the LLM")
202
- # with col4:
203
- # st.number_input('Max Output Tokens', value = 500, placeholder='Max Output Tokens', key = 'input_maxTokens', step = 100, help = "This decides the total number of tokens generated as the final response. Note: Values greater than 1000 takes longer response time")
204
-
205
- # st.markdown('---')
206
 
207
 
208
  def write_user_message(md):
@@ -226,41 +206,6 @@ def render_answer(question,answer,index,res_img):
226
  with col2:
227
  ans_ = answer['answer']
228
  st.write(ans_)
229
-
230
-
231
-
232
- # def stream_():
233
- # #use for streaming response on the client side
234
- # for word in ans_.split(" "):
235
- # yield word + " "
236
- # time.sleep(0.04)
237
- # #use for streaming response from Llm directly
238
- # if(isinstance(ans_,botocore.eventstream.EventStream)):
239
- # for event in ans_:
240
- # chunk = event.get('chunk')
241
-
242
- # if chunk:
243
-
244
- # chunk_obj = json.loads(chunk.get('bytes').decode())
245
-
246
- # if('content_block' in chunk_obj or ('delta' in chunk_obj and 'text' in chunk_obj['delta'])):
247
- # key_ = list(chunk_obj.keys())[2]
248
- # text = chunk_obj[key_]['text']
249
-
250
- # clear_output(wait=True)
251
- # output.append(text)
252
- # yield text
253
- # time.sleep(0.04)
254
-
255
-
256
-
257
- # if(index == len(st.session_state.questions_)):
258
- # st.write_stream(stream_)
259
- # if(isinstance(st.session_state.answers_[index-1]['answer'],botocore.eventstream.EventStream)):
260
- # st.session_state.answers_[index-1]['answer'] = "".join(output)
261
- # else:
262
- # st.write(ans_)
263
-
264
 
265
  polly_response = polly_client.synthesize_speech(VoiceId='Joanna',
266
  OutputFormat='ogg_vorbis',
@@ -277,15 +222,13 @@ def render_answer(question,answer,index,res_img):
277
  st.session_state.maxSimImages = colpali.img_highlight(st.session_state.top_img, st.session_state.query_token_vectors, st.session_state.query_tokens)
278
  handle_input()
279
  with placeholder.container():
280
- render_all()
 
 
 
281
  if(st.session_state.input_is_colpali):
282
  st.button("Show similarity map",key=rdn_key_1,on_click = show_maxsim)
283
 
284
-
285
-
286
- #st.markdown("<div style='font-size:18px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;border-radius: 10px;'>"+ans_+"</div>", unsafe_allow_html = True)
287
- #st.markdown("<div style='color:#e28743';padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 10px;'><b>Relevant images from the document :</b></div>", unsafe_allow_html = True)
288
- #st.write("")
289
  colu1,colu2,colu3 = st.columns([4,82,20])
290
  with colu2:
291
  with st.expander("Relevant Sources:"):
@@ -313,11 +256,6 @@ def render_answer(question,answer,index,res_img):
313
  col3_,col4_,col5_ = st.columns([33,33,33])
314
  with col3_:
315
  st.image(res_img[i]['file'])
316
-
317
-
318
-
319
-
320
-
321
  else:
322
  if(res_img[i]['file'].lower()!='none' and idx < 1):
323
  col3,col4,col5 = st.columns([33,33,33])
@@ -349,24 +287,24 @@ def render_answer(question,answer,index,res_img):
349
  for _ in range(10)])
350
  currentValue = ''.join(st.session_state.input_rag_searchType)+str(st.session_state.input_is_rerank)+str(st.session_state.input_table_with_sql)+st.session_state.input_index
351
  oldValue = ''.join(st.session_state.inputs_["rag_searchType"])+str(st.session_state.inputs_["is_rerank"])+str(st.session_state.inputs_["table_with_sql"])+str(st.session_state.inputs_["index"])
352
- def on_button_click():
353
- if(currentValue!=oldValue or 1==1):
354
- st.session_state.input_query = st.session_state.questions_[-1]["question"]
355
- st.session_state.answers_.pop()
356
- st.session_state.questions_.pop()
357
 
358
- handle_input()
359
- with placeholder.container():
360
- render_all()
361
- if("currentValue" in st.session_state):
362
- del st.session_state["currentValue"]
363
-
364
- try:
365
- del regenerate
366
- except:
367
- pass
368
- placeholder__ = st.empty()
369
- placeholder__.button("🔄",key=rdn_key,on_click=on_button_click)
370
 
371
 
372
  #Each answer will have context of the question asked in order to associate the provided feedback with the respective question
@@ -389,7 +327,10 @@ def render_all():
389
 
390
  placeholder = st.empty()
391
  with placeholder.container():
392
- render_all()
 
 
 
393
 
394
  st.markdown("")
395
  col_2, col_3 = st.columns([75,20])
@@ -425,43 +366,7 @@ with st.sidebar:
425
  st.markdown("<span style = 'color:#FF9900;'>UK Housing</span> - which city has the highest average housing price in UK ?",unsafe_allow_html=True)
426
  st.markdown("<span style = 'color:#FF9900;'>Covid19 impacts</span> - How many aged above 85 years died due to covid ?",unsafe_allow_html=True)
427
 
428
-
429
- #st.subheader(":blue[Your multi-modal documents]")
430
- # pdf_doc_ = st.file_uploader(
431
- # "Upload your PDFs here and click on 'Process'", accept_multiple_files=False)
432
-
433
-
434
- # pdf_docs = [pdf_doc_]
435
- # if st.button("Process"):
436
- # with st.spinner("Processing"):
437
- # if os.path.isdir(parent_dirname+"/pdfs") == False:
438
- # os.mkdir(parent_dirname+"/pdfs")
439
-
440
- # for pdf_doc in pdf_docs:
441
- # print(type(pdf_doc))
442
- # pdf_doc_name = (pdf_doc.name).replace(" ","_")
443
- # with open(os.path.join(parent_dirname+"/pdfs",pdf_doc_name),"wb") as f:
444
- # f.write(pdf_doc.getbuffer())
445
-
446
- # request_ = { "bucket": s3_bucket_,"key": pdf_doc_name}
447
- # # if(st.session_state.input_copali_rerank):
448
- # # copali.process_doc(request_)
449
- # # else:
450
- # rag_DocumentLoader.load_docs(request_)
451
- # print('lambda done')
452
- # st.success('you can start searching on your PDF')
453
-
454
- ############## haystach demo temporary addition ############
455
- # st.subheader(":blue[Multimodality]")
456
- # colu1,colu2 = st.columns([50,50])
457
- # with colu1:
458
- # in_images = st.toggle('Images', key = 'in_images', disabled = False)
459
- # with colu2:
460
- # in_tables = st.toggle('Tables', key = 'in_tables', disabled = False)
461
- # if(in_tables):
462
- # st.session_state.input_table_with_sql = True
463
- # else:
464
- # st.session_state.input_table_with_sql = False
465
 
466
  ############## haystach demo temporary addition ############
467
  #if(pdf_doc_ is None or pdf_doc_ == ""):
@@ -473,12 +378,7 @@ with st.sidebar:
473
  st.session_state.input_index = "2104"
474
  if(index_select == "UK Housing"):
475
  st.session_state.input_index = "hpijan2024hometrack"
476
-
477
- # custom_index = st.text_input("If uploaded the file already, enter the original file name", value = "")
478
- # if(custom_index!=""):
479
- # st.session_state.input_index = re.sub('[^A-Za-z0-9]+', '', (custom_index.lower().replace(".pdf","").split("/")[-1].split(".")[0]).lower())
480
-
481
-
482
 
483
  st.subheader(":blue[Retriever]")
484
  search_type = st.multiselect('Select the Retriever(s)',
@@ -512,5 +412,10 @@ with st.sidebar:
512
  with st.expander("Sample questions for Colpali retriever:"):
513
  st.write("1. Proportion of female new hires 2021-2023? \n\n 2. First-half 2021 return on unlisted real estate investments? \n\n 3. Trend of the fund's expected absolute volatility between January 2014 and January 2016? \n\n 4. Fund return percentage in 2017? \n\n 5. Annualized gross return of the fund from 1997 to 2008?")
514
 
 
 
 
 
515
 
 
516
 
 
34
  layout="wide",
35
  page_icon="images/opensearch_mark_default.png"
36
  )
37
+ if "trigger_search" not in st.session_state:
38
+ st.session_state.trigger_search = False
39
+
40
+
41
  parent_dirname = "/".join((os.path.dirname(__file__)).split("/")[0:-1])
42
  USER_ICON = "images/user.png"
43
  AI_ICON = "images/opensearch-twitter-card.png"
 
154
 
155
 
156
  def handle_input():
157
+ st.session_state.trigger_search = True
158
  print("Question: "+st.session_state.input_query)
159
  print("-----------")
160
  print("\n\n")
 
183
  'table':out_['table']
184
  })
185
  st.session_state.input_query=""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
 
188
  def write_user_message(md):
 
206
  with col2:
207
  ans_ = answer['answer']
208
  st.write(ans_)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  polly_response = polly_client.synthesize_speech(VoiceId='Joanna',
211
  OutputFormat='ogg_vorbis',
 
222
  st.session_state.maxSimImages = colpali.img_highlight(st.session_state.top_img, st.session_state.query_token_vectors, st.session_state.query_tokens)
223
  handle_input()
224
  with placeholder.container():
225
+ if st.session_state.trigger_search:
226
+ handle_input()
227
+ render_all()
228
+ #render_all()
229
  if(st.session_state.input_is_colpali):
230
  st.button("Show similarity map",key=rdn_key_1,on_click = show_maxsim)
231
 
 
 
 
 
 
232
  colu1,colu2,colu3 = st.columns([4,82,20])
233
  with colu2:
234
  with st.expander("Relevant Sources:"):
 
256
  col3_,col4_,col5_ = st.columns([33,33,33])
257
  with col3_:
258
  st.image(res_img[i]['file'])
 
 
 
 
 
259
  else:
260
  if(res_img[i]['file'].lower()!='none' and idx < 1):
261
  col3,col4,col5 = st.columns([33,33,33])
 
287
  for _ in range(10)])
288
  currentValue = ''.join(st.session_state.input_rag_searchType)+str(st.session_state.input_is_rerank)+str(st.session_state.input_table_with_sql)+st.session_state.input_index
289
  oldValue = ''.join(st.session_state.inputs_["rag_searchType"])+str(st.session_state.inputs_["is_rerank"])+str(st.session_state.inputs_["table_with_sql"])+str(st.session_state.inputs_["index"])
290
+ # def on_button_click():
291
+ # if(currentValue!=oldValue or 1==1):
292
+ # st.session_state.input_query = st.session_state.questions_[-1]["question"]
293
+ # st.session_state.answers_.pop()
294
+ # st.session_state.questions_.pop()
295
 
296
+ # handle_input()
297
+ # with placeholder.container():
298
+ # render_all()
299
+ # if("currentValue" in st.session_state):
300
+ # del st.session_state["currentValue"]
301
+
302
+ # try:
303
+ # del regenerate
304
+ # except:
305
+ # pass
306
+ # placeholder__ = st.empty()
307
+ # placeholder__.button("🔄",key=rdn_key,on_click=on_button_click)
308
 
309
 
310
  #Each answer will have context of the question asked in order to associate the provided feedback with the respective question
 
327
 
328
  placeholder = st.empty()
329
  with placeholder.container():
330
+ if st.session_state.trigger_search:
331
+ handle_input()
332
+ render_all()
333
+
334
 
335
  st.markdown("")
336
  col_2, col_3 = st.columns([75,20])
 
366
  st.markdown("<span style = 'color:#FF9900;'>UK Housing</span> - which city has the highest average housing price in UK ?",unsafe_allow_html=True)
367
  st.markdown("<span style = 'color:#FF9900;'>Covid19 impacts</span> - How many aged above 85 years died due to covid ?",unsafe_allow_html=True)
368
 
369
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
  ############## haystach demo temporary addition ############
372
  #if(pdf_doc_ is None or pdf_doc_ == ""):
 
378
  st.session_state.input_index = "2104"
379
  if(index_select == "UK Housing"):
380
  st.session_state.input_index = "hpijan2024hometrack"
381
+
 
 
 
 
 
382
 
383
  st.subheader(":blue[Retriever]")
384
  search_type = st.multiselect('Select the Retriever(s)',
 
412
  with st.expander("Sample questions for Colpali retriever:"):
413
  st.write("1. Proportion of female new hires 2021-2023? \n\n 2. First-half 2021 return on unlisted real estate investments? \n\n 3. Trend of the fund's expected absolute volatility between January 2014 and January 2016? \n\n 4. Fund return percentage in 2017? \n\n 5. Annualized gross return of the fund from 1997 to 2008?")
414
 
415
+ run = st.sidebar.button("🔍 Run Search")
416
+
417
+ if run:
418
+ st.session_state.trigger_search = True
419
 
420
+
421