OpenSearch-AI

Running on CPU Upgrade

App Files Files

prasadnu commited on May 19

Commit

0203fd7

1 Parent(s): 59c4f4e

rerank model

Browse files

Files changed (1) hide show

pages/Multimodal_Conversational_Search.py +37 -132

pages/Multimodal_Conversational_Search.py CHANGED Viewed

@@ -34,6 +34,10 @@ st.set_page_config(
     layout="wide",
     page_icon="images/opensearch_mark_default.png"
 )
 parent_dirname = "/".join((os.path.dirname(__file__)).split("/")[0:-1])
 USER_ICON = "images/user.png"
 AI_ICON = "images/opensearch-twitter-card.png"
@@ -150,6 +154,7 @@ if clear:
 def handle_input():
     print("Question: "+st.session_state.input_query)
     print("-----------")
     print("\n\n")
@@ -178,31 +183,6 @@ def handle_input():
         'table':out_['table']
     })
     st.session_state.input_query=""
-# search_type = st.selectbox('Select the Search type',
-#     ('Conversational Search (RAG)',
-#     'OpenSearch vector search',
-#     'LLM Text Generation'
-#     ),
-#     key = 'input_searchType',
-#     help = "Select the type of retriever\n1. Conversational Search (Recommended) - This will include both the OpenSearch and LLM in the retrieval pipeline \n (note: This will put opensearch response as context to LLM to answer) \n2. OpenSearch vector search - This will put only OpenSearch's vector search in the pipeline, \n(Warning: this will lead to unformatted results )\n3. LLM Text Generation - This will include only LLM in the pipeline, \n(Warning: This will give hallucinated and out of context answers_)"
-#     )
-# col1, col2, col3, col4 = st.columns(4)
-# with col1:
-#     st.text_input('Temperature', value = "0.001", placeholder='LLM Temperature', key = 'input_temperature',help = "Set the temperature of the Large Language model. \n Note: 1. Set this to values lower to 1 in the order of 0.001, 0.0001, such low values reduces hallucination and creativity in the LLM response; 2. This applies only when LLM is a part of the retriever pipeline")
-# with col2:
-#     st.number_input('Top K', value = 200, placeholder='Top K', key = 'input_topK', step = 50, help = "This limits the LLM's predictions to the top k most probable tokens at each step of generation, this applies only when LLM is a prt of the retriever pipeline")
-# with col3:
-#     st.number_input('Top P', value = 0.95, placeholder='Top P', key = 'input_topP', step = 0.05, help = "This sets a threshold probability and selects the top tokens whose cumulative probability exceeds the threshold while the tokens are generated by the LLM")
-# with col4:
-#     st.number_input('Max Output Tokens', value = 500, placeholder='Max Output Tokens', key = 'input_maxTokens', step = 100, help = "This decides the total number of tokens generated as the final response. Note: Values greater than 1000 takes longer response time")
-# st.markdown('---')
 def write_user_message(md):
@@ -226,41 +206,6 @@ def render_answer(question,answer,index,res_img):
     with col2:
         ans_ = answer['answer']
         st.write(ans_)
-        # def stream_():
-        #     #use for streaming response on the client side
-        #     for word in ans_.split(" "):
-        #         yield word + " "
-        #         time.sleep(0.04)
-        #     #use for streaming response from Llm directly
-        #     if(isinstance(ans_,botocore.eventstream.EventStream)):
-        #         for event in ans_:
-        #             chunk = event.get('chunk')
-        #             if chunk:
-        #                 chunk_obj = json.loads(chunk.get('bytes').decode())
-        #                 if('content_block' in chunk_obj or ('delta' in chunk_obj and 'text' in chunk_obj['delta'])):
-        #                     key_ = list(chunk_obj.keys())[2]
-        #                     text = chunk_obj[key_]['text']
-        #                     clear_output(wait=True)
-        #                     output.append(text)
-        #                     yield text
-        #                     time.sleep(0.04)
-        # if(index == len(st.session_state.questions_)):
-        #     st.write_stream(stream_)
-        #     if(isinstance(st.session_state.answers_[index-1]['answer'],botocore.eventstream.EventStream)):
-        #         st.session_state.answers_[index-1]['answer'] = "".join(output)
-        # else:
-        #     st.write(ans_)
         polly_response = polly_client.synthesize_speech(VoiceId='Joanna',
                         OutputFormat='ogg_vorbis',
@@ -277,15 +222,13 @@ def render_answer(question,answer,index,res_img):
             st.session_state.maxSimImages = colpali.img_highlight(st.session_state.top_img, st.session_state.query_token_vectors, st.session_state.query_tokens)
             handle_input()
             with placeholder.container():
-                render_all()
         if(st.session_state.input_is_colpali):
             st.button("Show similarity map",key=rdn_key_1,on_click = show_maxsim)
-        #st.markdown("<div style='font-size:18px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;border-radius: 10px;'>"+ans_+"</div>", unsafe_allow_html = True)
-    #st.markdown("<div style='color:#e28743';padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 10px;'><b>Relevant images from the document :</b></div>", unsafe_allow_html = True)
-    #st.write("")
     colu1,colu2,colu3 = st.columns([4,82,20])
     with colu2:
         with st.expander("Relevant Sources:"):
@@ -313,11 +256,6 @@ def render_answer(question,answer,index,res_img):
                                     col3_,col4_,col5_ = st.columns([33,33,33])
                                     with col3_:
                                         st.image(res_img[i]['file'])
                         else:
                             if(res_img[i]['file'].lower()!='none' and idx < 1):
                                 col3,col4,col5 = st.columns([33,33,33])
@@ -349,24 +287,24 @@ def render_answer(question,answer,index,res_img):
                               for _ in range(10)])
             currentValue = ''.join(st.session_state.input_rag_searchType)+str(st.session_state.input_is_rerank)+str(st.session_state.input_table_with_sql)+st.session_state.input_index
             oldValue = ''.join(st.session_state.inputs_["rag_searchType"])+str(st.session_state.inputs_["is_rerank"])+str(st.session_state.inputs_["table_with_sql"])+str(st.session_state.inputs_["index"])
-            def on_button_click():
-                if(currentValue!=oldValue or 1==1):
-                    st.session_state.input_query = st.session_state.questions_[-1]["question"]
-                    st.session_state.answers_.pop()
-                    st.session_state.questions_.pop()
-                    handle_input()
-                    with placeholder.container():
-                        render_all()
-            if("currentValue"  in st.session_state):
-                del st.session_state["currentValue"]
-            try:
-                del regenerate
-            except:
-                pass
-            placeholder__ = st.empty()
-            placeholder__.button("🔄",key=rdn_key,on_click=on_button_click)
 #Each answer will have context of the question asked in order to associate the provided feedback with the respective question
@@ -389,7 +327,10 @@ def render_all():
 placeholder = st.empty()
 with placeholder.container():
-  render_all()
 st.markdown("")
 col_2, col_3 = st.columns([75,20])
@@ -425,43 +366,7 @@ with st.sidebar:
         st.markdown("<span style = 'color:#FF9900;'>UK Housing</span> - which city has the highest average housing price in UK ?",unsafe_allow_html=True)
         st.markdown("<span style = 'color:#FF9900;'>Covid19 impacts</span> - How many aged above 85 years died due to covid ?",unsafe_allow_html=True)
-    #st.subheader(":blue[Your multi-modal documents]")
-    # pdf_doc_ = st.file_uploader(
-    #     "Upload your PDFs here and click on 'Process'", accept_multiple_files=False)
-    # pdf_docs = [pdf_doc_]
-    # if st.button("Process"):
-    #     with st.spinner("Processing"):
-    #         if os.path.isdir(parent_dirname+"/pdfs") == False:
-    #             os.mkdir(parent_dirname+"/pdfs")
-    #         for pdf_doc in pdf_docs:
-    #             print(type(pdf_doc))
-    #             pdf_doc_name = (pdf_doc.name).replace(" ","_")
-    #             with open(os.path.join(parent_dirname+"/pdfs",pdf_doc_name),"wb") as f:
-    #                 f.write(pdf_doc.getbuffer())
-    #             request_ = { "bucket": s3_bucket_,"key": pdf_doc_name}
-    #             # if(st.session_state.input_copali_rerank):
-    #             #     copali.process_doc(request_)
-    #             # else:
-    #             rag_DocumentLoader.load_docs(request_)
-    #             print('lambda done')
-    #     st.success('you can start searching on your PDF')
-    ############## haystach demo temporary addition ############
-    # st.subheader(":blue[Multimodality]")
-    # colu1,colu2 = st.columns([50,50])
-    # with colu1:
-    #     in_images = st.toggle('Images', key = 'in_images', disabled = False)
-    # with colu2:
-    #     in_tables = st.toggle('Tables', key = 'in_tables', disabled = False)
-    # if(in_tables):
-    #     st.session_state.input_table_with_sql = True
-    # else:
-    #     st.session_state.input_table_with_sql = False
      ############## haystach demo temporary addition ############
     #if(pdf_doc_ is None or pdf_doc_ == ""):
@@ -473,12 +378,7 @@ with st.sidebar:
         st.session_state.input_index = "2104"
     if(index_select == "UK Housing"):
         st.session_state.input_index = "hpijan2024hometrack"
-    # custom_index = st.text_input("If uploaded the file already, enter the original file name", value = "")
-    # if(custom_index!=""):
-    #     st.session_state.input_index = re.sub('[^A-Za-z0-9]+', '', (custom_index.lower().replace(".pdf","").split("/")[-1].split(".")[0]).lower())
     st.subheader(":blue[Retriever]")
     search_type = st.multiselect('Select the Retriever(s)',
@@ -512,5 +412,10 @@ with st.sidebar:
     with st.expander("Sample questions for Colpali retriever:"):
         st.write("1. Proportion of female new hires 2021-2023? \n\n 2. First-half 2021 return on unlisted real estate investments? \n\n 3. Trend of the fund's expected absolute volatility between January 2014 and January 2016? \n\n 4. Fund return percentage in 2017? \n\n 5. Annualized gross return of the fund from 1997 to 2008?")

     layout="wide",
     page_icon="images/opensearch_mark_default.png"
 )
+if "trigger_search" not in st.session_state:
+    st.session_state.trigger_search = False
 parent_dirname = "/".join((os.path.dirname(__file__)).split("/")[0:-1])
 USER_ICON = "images/user.png"
 AI_ICON = "images/opensearch-twitter-card.png"
 def handle_input():
+    st.session_state.trigger_search = True
     print("Question: "+st.session_state.input_query)
     print("-----------")
     print("\n\n")
         'table':out_['table']
     })
     st.session_state.input_query=""
 def write_user_message(md):
     with col2:
         ans_ = answer['answer']
         st.write(ans_)
         polly_response = polly_client.synthesize_speech(VoiceId='Joanna',
                         OutputFormat='ogg_vorbis',
             st.session_state.maxSimImages = colpali.img_highlight(st.session_state.top_img, st.session_state.query_token_vectors, st.session_state.query_tokens)
             handle_input()
             with placeholder.container():
+                if st.session_state.trigger_search:
+                handle_input()
+                render_all()
+                #render_all()
         if(st.session_state.input_is_colpali):
             st.button("Show similarity map",key=rdn_key_1,on_click = show_maxsim)
     colu1,colu2,colu3 = st.columns([4,82,20])
     with colu2:
         with st.expander("Relevant Sources:"):
                                     col3_,col4_,col5_ = st.columns([33,33,33])
                                     with col3_:
                                         st.image(res_img[i]['file'])
                         else:
                             if(res_img[i]['file'].lower()!='none' and idx < 1):
                                 col3,col4,col5 = st.columns([33,33,33])
                               for _ in range(10)])
             currentValue = ''.join(st.session_state.input_rag_searchType)+str(st.session_state.input_is_rerank)+str(st.session_state.input_table_with_sql)+st.session_state.input_index
             oldValue = ''.join(st.session_state.inputs_["rag_searchType"])+str(st.session_state.inputs_["is_rerank"])+str(st.session_state.inputs_["table_with_sql"])+str(st.session_state.inputs_["index"])
+            # def on_button_click():
+            #     if(currentValue!=oldValue or 1==1):
+            #         st.session_state.input_query = st.session_state.questions_[-1]["question"]
+            #         st.session_state.answers_.pop()
+            #         st.session_state.questions_.pop()
+            #         handle_input()
+            #         with placeholder.container():
+            #             render_all()
+            # if("currentValue"  in st.session_state):
+            #     del st.session_state["currentValue"]
+            # try:
+            #     del regenerate
+            # except:
+            #     pass
+            # placeholder__ = st.empty()
+            # placeholder__.button("🔄",key=rdn_key,on_click=on_button_click)
 #Each answer will have context of the question asked in order to associate the provided feedback with the respective question
 placeholder = st.empty()
 with placeholder.container():
+  if st.session_state.trigger_search:
+    handle_input()
+    render_all()
 st.markdown("")
 col_2, col_3 = st.columns([75,20])
         st.markdown("<span style = 'color:#FF9900;'>UK Housing</span> - which city has the highest average housing price in UK ?",unsafe_allow_html=True)
         st.markdown("<span style = 'color:#FF9900;'>Covid19 impacts</span> - How many aged above 85 years died due to covid ?",unsafe_allow_html=True)
      ############## haystach demo temporary addition ############
     #if(pdf_doc_ is None or pdf_doc_ == ""):
         st.session_state.input_index = "2104"
     if(index_select == "UK Housing"):
         st.session_state.input_index = "hpijan2024hometrack"
     st.subheader(":blue[Retriever]")
     search_type = st.multiselect('Select the Retriever(s)',
     with st.expander("Sample questions for Colpali retriever:"):
         st.write("1. Proportion of female new hires 2021-2023? \n\n 2. First-half 2021 return on unlisted real estate investments? \n\n 3. Trend of the fund's expected absolute volatility between January 2014 and January 2016? \n\n 4. Fund return percentage in 2017? \n\n 5. Annualized gross return of the fund from 1997 to 2008?")
+    run = st.sidebar.button("🔍 Run Search")
+    if run:
+        st.session_state.trigger_search = True