OpenSearch-AI

Running on CPU Upgrade

App Files Files

prasadnu commited on May 19

Commit

eb03410

1 Parent(s): 7862398

rerank model

Browse files

Files changed (8) hide show

RAG/bedrock_agent.py +1 -44
RAG/rag_DocumentSearcher.py +0 -17
app.py +0 -24
pages/AI_Shopping_Assistant.py +7 -404
pages/Semantic_Search.py +19 -342
semantic_search/amazon_rekognition.py +2 -47
utilities/invoke_models.py +2 -84
utilities/re_ranker.py +0 -127

RAG/bedrock_agent.py CHANGED Viewed

@@ -23,8 +23,6 @@ if "inputs_" not in st.session_state:
 parent_dirname = "/".join((os.path.dirname(__file__)).split("/")[0:-1])
 region = 'us-east-1'
-print(region)
-account_id = '445083327804'
 # setting logger
 logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -46,9 +44,6 @@ def delete_memory():
     )
 def query_(inputs):
-    ## create a random id for session initiator id
     # invoke the agent API
     agentResponse = bedrock_agent_runtime_client.invoke_agent(
         inputText=inputs['shopping_query'],
@@ -71,13 +66,6 @@ def query_(inputs):
         for event in event_stream:
             print("***event*********")
             print(event)
-            # if 'chunk' in event:
-            #     data = event['chunk']['bytes']
-            #     print("***chunk*********")
-            #     print(data)
-            #     logger.info(f"Final answer ->\n{data.decode('utf8')}")
-            #     agent_answer_ = data.decode('utf8')
-            #     print(agent_answer_)
             if 'trace' in event:
                 print("trace*****total*********")
                 print(event['trace'])
@@ -109,38 +97,7 @@ def query_(inputs):
         print(total_context)
     except botocore.exceptions.EventStreamError as error:
         raise error
-        # t.sleep(2)
-        # query_(st.session_state.inputs_)
-            # if 'chunk' in event:
-            #     data = event['chunk']['bytes']
-            #     final_ans = data.decode('utf8')
-            #     print(f"Final answer ->\n{final_ans}")
-            #     logger.info(f"Final answer ->\n{final_ans}")
-            #     agent_answer = final_ans
-            #     end_event_received = True
-            #     # End event indicates that the request finished successfully
-            # elif 'trace' in event:
-            #     logger.info(json.dumps(event['trace'], indent=2))
-            # else:
-            #     raise Exception("unexpected event.", event)
-    # except Exception as e:
-    #     raise Exception("unexpected event.", e)
     return {'text':agent_answer,'source':total_context,'last_tool':{'name':last_tool_name,'response':last_tool}}
-        ####### Re-Rank ########
-    #print("re-rank")
-    # if(st.session_state.input_is_rerank == True and len(total_context)):
-    #     ques = [{"question":question}]
-    #     ans = [{"answer":total_context}]
-    #     total_context = re_ranker.re_rank('rag','Cross Encoder',"",ques, ans)
-    # llm_prompt = prompt_template.format(context=total_context[0],question=question)
-    # output = invoke_models.invoke_llm_model( "\n\nHuman: {input}\n\nAssistant:".format(input=llm_prompt) ,False)
-    # #print(output)
-    # if(len(images_2)==0):
-    #     images_2 = images
-    # return {'text':output,'source':total_context,'image':images_2,'table':df}

 parent_dirname = "/".join((os.path.dirname(__file__)).split("/")[0:-1])
 region = 'us-east-1'
 # setting logger
 logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
 logger = logging.getLogger(__name__)
     )
 def query_(inputs):
     # invoke the agent API
     agentResponse = bedrock_agent_runtime_client.invoke_agent(
         inputText=inputs['shopping_query'],
         for event in event_stream:
             print("***event*********")
             print(event)
             if 'trace' in event:
                 print("trace*****total*********")
                 print(event['trace'])
         print(total_context)
     except botocore.exceptions.EventStreamError as error:
         raise error
     return {'text':agent_answer,'source':total_context,'last_tool':{'name':last_tool_name,'response':last_tool}}

RAG/rag_DocumentSearcher.py CHANGED Viewed

@@ -49,7 +49,6 @@ def query_(awsauth,inputs, session_id,search_types):
     images = []
     for hit in hits:
-        #context.append(hit['_source']['caption'])
         images.append({'file':hit['_source']['image'],'caption':hit['_source']['processed_element']})
     ####### SEARCH ########
@@ -102,10 +101,6 @@ def query_(awsauth,inputs, session_id,search_types):
                     }
                     ]
     SIZE = 5
     hybrid_payload = {
@@ -159,7 +154,6 @@ def query_(awsauth,inputs, session_id,search_types):
     if('Sparse Search' in search_types):
-        #print("text expansion is enabled")
         sparse_payload =  {  "neural_sparse": {
                 "processed_element_embedding_sparse": {
                     "query_text": question,
@@ -301,7 +295,6 @@ def query_(awsauth,inputs, session_id,search_types):
             images_2.append({'file':hit["_source"]["image"],'caption':hit["_source"]["processed_element"]})
         idx = idx +1
-        #images.append(hit['_source']['image'])
     # if(is_table_in_result == False):
     #     df = lazy_get_table()
@@ -315,19 +308,9 @@ def query_(awsauth,inputs, session_id,search_types):
     total_context = context_tables + context
-    ####### Re-Rank ########
-    #print("re-rank")
-    # if(st.session_state.input_is_rerank == True and len(total_context)):
-    #     ques = [{"question":question}]
-    #     ans = [{"answer":total_context}]
-    #     total_context = re_ranker.re_rank('rag','Cross Encoder',"",ques, ans)
     llm_prompt = prompt_template.format(context=total_context[0],question=question)
     output = invoke_models.invoke_llm_model( "\n\nHuman: {input}\n\nAssistant:".format(input=llm_prompt) ,False)
-    #print(output)
     if(len(images_2)==0):
         images_2 = images
     return {'text':output,'source':total_context,'image':images_2,'table':df}

     images = []
     for hit in hits:
         images.append({'file':hit['_source']['image'],'caption':hit['_source']['processed_element']})
     ####### SEARCH ########
                     }
                     ]
     SIZE = 5
     hybrid_payload = {
     if('Sparse Search' in search_types):
         sparse_payload =  {  "neural_sparse": {
                 "processed_element_embedding_sparse": {
                     "query_text": question,
             images_2.append({'file':hit["_source"]["image"],'caption':hit["_source"]["processed_element"]})
         idx = idx +1
     # if(is_table_in_result == False):
     #     df = lazy_get_table()
     total_context = context_tables + context
     llm_prompt = prompt_template.format(context=total_context[0],question=question)
     output = invoke_models.invoke_llm_model( "\n\nHuman: {input}\n\nAssistant:".format(input=llm_prompt) ,False)
     if(len(images_2)==0):
         images_2 = images
     return {'text':output,'source':total_context,'image':images_2,'table':df}

app.py CHANGED Viewed

@@ -152,28 +152,6 @@ spacer_col = st.columns(1)[0]
 with spacer_col:
     st.markdown("<div style='height: 120px;'></div>", unsafe_allow_html=True)
-    #st.image("/home/ubuntu/images/OS_AI_1.png", use_column_width=True)
-# with col_title:
-#     st.write("")
-#     st.markdown('<div class="title">OpenSearch AI demos</div>', unsafe_allow_html=True)
-# def demo_link_block(icon, title, target_page):
-#     st.markdown(f"""
-#         <a href="/{target_page}" target="_self" style="text-decoration: none;">
-#             <div class="demo-card">
-#                 <div class="demo-text">
-#                     <span>{icon} {title}</span>
-#                     <span class="demo-arrow">→</span>
-#                 </div>
-#             </div>
-#         </a>
-#     """, unsafe_allow_html=True)
-# st.write("")
-# demo_link_block("🔍", "AI Search", "Semantic_Search")
-# demo_link_block("💬","Multimodal Conversational Search", "Multimodal_Conversational_Search")
-# demo_link_block("🛍️","Agentic Shopping Assistant", "AI_Shopping_Assistant")
 col1, col2, col3 = st.columns(3)
@@ -225,5 +203,3 @@ st.markdown("""
     </style>
 """, unsafe_allow_html=True)
-#    <div class="card-arrow"></div>

 with spacer_col:
     st.markdown("<div style='height: 120px;'></div>", unsafe_allow_html=True)
 col1, col2, col3 = st.columns(3)
     </style>
 """, unsafe_allow_html=True)

pages/AI_Shopping_Assistant.py CHANGED Viewed

@@ -33,12 +33,7 @@ import bedrock_agent
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 st.set_page_config(
-    #page_title="Semantic Search using OpenSearch",
     layout="wide",
     page_icon="images/opensearch_mark_default.png"
 )
@@ -47,15 +42,14 @@ USER_ICON = "images/user.png"
 AI_ICON = "images/opensearch-twitter-card.png"
 REGENERATE_ICON = "images/regenerate.png"
 s3_bucket_ = "pdf-repo-uploads"
-            #"pdf-repo-uploads"
 polly_client = boto3.Session(
             region_name='us-east-1').client('polly')
 # Check if the user ID is already stored in the session state
 if 'user_id' in st.session_state:
     user_id = st.session_state['user_id']
-    #print(f"User ID: {user_id}")
 # If the user ID is not yet stored in the session state, generate a random UUID
 else:
     user_id = str(uuid.uuid4())
@@ -79,9 +73,6 @@ if "questions__" not in st.session_state:
 if "answers__" not in st.session_state:
     st.session_state.answers__ = []
-if "input_index" not in st.session_state:
-    st.session_state.input_index = "hpijan2024hometrack"#"globalwarmingnew"#"hpijan2024hometrack_no_img_no_table"
 if "input_is_rerank" not in st.session_state:
     st.session_state.input_is_rerank = True
@@ -92,22 +83,17 @@ if "input_copali_rerank" not in st.session_state:
 if "input_table_with_sql" not in st.session_state:
     st.session_state.input_table_with_sql = False
 if "inputs_" not in st.session_state:
     st.session_state.inputs_ = {}
 if "input_shopping_query" not in st.session_state:
-    st.session_state.input_shopping_query="get me shoes suitable for trekking"#"What is the projected energy percentage from renewable sources in future?"#"Which city in United Kingdom has the highest average housing price ?"#"How many aged above 85 years died due to covid ?"# What is the projected energy from renewable sources ?"
 if "input_rag_searchType" not in st.session_state:
     st.session_state.input_rag_searchType = ["Sparse Search"]
 region = 'us-east-1'
-#bedrock_runtime_client = boto3.client('bedrock-runtime',region_name=region)
 output = []
 service = 'es'
@@ -122,48 +108,6 @@ st.markdown("""
     </style>
     """,unsafe_allow_html=True)
-################ OpenSearch Py client #####################
-# credentials = boto3.Session().get_credentials()
-# awsauth = AWSV4SignerAuth(credentials, region, service)
-# ospy_client = OpenSearch(
-#     hosts = [{'host': 'search-opensearchservi-75ucark0bqob-bzk6r6h2t33dlnpgx2pdeg22gi.us-east-1.es.amazonaws.com', 'port': 443}],
-#     http_auth = awsauth,
-#     use_ssl = True,
-#     verify_certs = True,
-#     connection_class = RequestsHttpConnection,
-#     pool_maxsize = 20
-# )
-################# using boto3 credentials ###################
-# credentials = boto3.Session().get_credentials()
-# awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)
-# service = 'es'
-################# using boto3 credentials ####################
-# if "input_searchType" not in st.session_state:
-#     st.session_state.input_searchType = "Conversational Search (RAG)"
-# if "input_temperature" not in st.session_state:
-#     st.session_state.input_temperature = "0.001"
-# if "input_topK" not in st.session_state:
-#     st.session_state.input_topK = 200
-# if "input_topP" not in st.session_state:
-#     st.session_state.input_topP = 0.95
-# if "input_maxTokens" not in st.session_state:
-#     st.session_state.input_maxTokens = 1024
 def write_logo():
     col1, col2, col3 = st.columns([5, 1, 5])
     with col2:
@@ -175,8 +119,6 @@ def write_top_bar():
         st.page_link("app.py", label=":orange[Home]", icon="🏠")
         st.header("AI Shopping assistant",divider='rainbow')
-        #st.image(AI_ICON, use_column_width='always')
     with col2:
         st.write("")
         st.write("")
@@ -193,17 +135,10 @@ if clear:
     st.session_state.input_shopping_query=""
     st.session_state.session_id_ = str(uuid.uuid1())
     bedrock_agent.delete_memory()
-    # st.session_state.input_searchType="Conversational Search (RAG)"
-    # st.session_state.input_temperature = "0.001"
-    # st.session_state.input_topK = 200
-    # st.session_state.input_topP = 0.95
-    # st.session_state.input_maxTokens = 1024
 def handle_input():
-    print("Question: "+st.session_state.input_shopping_query)
-    print("-----------")
-    print("\n\n")
     if(st.session_state.input_shopping_query==''):
         return ""
     inputs = {}
@@ -212,10 +147,6 @@ def handle_input():
             inputs[key.removeprefix('input_')] = st.session_state[key]
     st.session_state.inputs_ = inputs
-    #######
-    #st.write(inputs)
     question_with_id = {
         'question': inputs["shopping_query"],
         'id': len(st.session_state.questions__)
@@ -234,30 +165,6 @@ def handle_input():
     st.session_state.input_shopping_query=""
-# search_type = st.selectbox('Select the Search type',
-#     ('Conversational Search (RAG)',
-#     'OpenSearch vector search',
-#     'LLM Text Generation'
-#     ),
-#     key = 'input_searchType',
-#     help = "Select the type of retriever\n1. Conversational Search (Recommended) - This will include both the OpenSearch and LLM in the retrieval pipeline \n (note: This will put opensearch response as context to LLM to answer) \n2. OpenSearch vector search - This will put only OpenSearch's vector search in the pipeline, \n(Warning: this will lead to unformatted results )\n3. LLM Text Generation - This will include only LLM in the pipeline, \n(Warning: This will give hallucinated and out of context answers_)"
-#     )
-# col1, col2, col3, col4 = st.columns(4)
-# with col1:
-#     st.text_input('Temperature', value = "0.001", placeholder='LLM Temperature', key = 'input_temperature',help = "Set the temperature of the Large Language model. \n Note: 1. Set this to values lower to 1 in the order of 0.001, 0.0001, such low values reduces hallucination and creativity in the LLM response; 2. This applies only when LLM is a part of the retriever pipeline")
-# with col2:
-#     st.number_input('Top K', value = 200, placeholder='Top K', key = 'input_topK', step = 50, help = "This limits the LLM's predictions to the top k most probable tokens at each step of generation, this applies only when LLM is a prt of the retriever pipeline")
-# with col3:
-#     st.number_input('Top P', value = 0.95, placeholder='Top P', key = 'input_topP', step = 0.05, help = "This sets a threshold probability and selects the top tokens whose cumulative probability exceeds the threshold while the tokens are generated by the LLM")
-# with col4:
-#     st.number_input('Max Output Tokens', value = 500, placeholder='Max Output Tokens', key = 'input_maxTokens', step = 100, help = "This decides the total number of tokens generated as the final response. Note: Values greater than 1000 takes longer response time")
-# st.markdown('---')
 def write_user_message(md):
     col1, col2 = st.columns([3,97])
@@ -265,8 +172,6 @@ def write_user_message(md):
     with col1:
         st.image(USER_ICON, use_column_width='always')
     with col2:
-        #st.warning(md['question'])
         st.markdown("<div style='color:#e28743';font-size:18px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 10px;font-style: italic;'>"+md['question']+"</div>", unsafe_allow_html = True)
@@ -283,18 +188,9 @@ def render_answer(question,answer,index):
         ans_ = answer['answer']
         span_ans = ans_.replace('<question>',"<span style='fontSize:18px;color:#f37709;fontStyle:italic;'>").replace("</question>","</span>")
         st.markdown("<p>"+span_ans+"</p>",unsafe_allow_html = True)
-        print("answer['source']")
-        print("-------------")
-        print(answer['source'])
-        print("-------------")
-        print(answer['last_tool'])
         if(answer['last_tool']['name'] in ["generate_images","get_relevant_items_for_image","get_relevant_items_for_text","retrieve_with_hybrid_search","retrieve_with_keyword_search","get_any_general_recommendation"]):
             use_interim_results = True
             src_dict =json.loads(answer['last_tool']['response'].replace("'",'"'))
-        print("src_dict")
-        print("-------------")
-        print(src_dict)
-        #if("get_relevant_items_for_text" in src_dict):
         if(use_interim_results and answer['last_tool']['name']!= 'generate_images' and answer['last_tool']['name']!= 'get_any_general_recommendation'):
             key_ = answer['last_tool']['name']
@@ -310,9 +206,7 @@ def render_answer(question,answer,index):
                 if(index ==1):
                     with img_col2:
                         st.image(resizedImg,use_column_width = True,caption = item['title'])
-                        #st.image(parent_dirname+"/retrieved_esci_images/"+item['id']+"_resized.jpg",caption = item['title'],use_column_width = True)
         if(answer['last_tool']['name'] == "generate_images" or answer['last_tool']['name'] == "get_any_general_recommendation"):
             st.write("<br>",unsafe_allow_html = True)
             gen_img_col1, gen_img_col2,gen_img_col2 = st.columns([30,30,30])
@@ -328,143 +222,17 @@ def render_answer(question,answer,index):
             with gen_img_col1:
                 st.image(resizedImg,caption = "Generated image for "+key.split(".")[0],use_column_width = True)
             st.write("<br>",unsafe_allow_html = True)
-        # def stream_():
-        #     #use for streaming response on the client side
-        #     for word in ans_.split(" "):
-        #         yield word + " "
-        #         time.sleep(0.04)
-        #     #use for streaming response from Llm directly
-        #     if(isinstance(ans_,botocore.eventstream.EventStream)):
-        #         for event in ans_:
-        #             chunk = event.get('chunk')
-        #             if chunk:
-        #                 chunk_obj = json.loads(chunk.get('bytes').decode())
-        #                 if('content_block' in chunk_obj or ('delta' in chunk_obj and 'text' in chunk_obj['delta'])):
-        #                     key_ = list(chunk_obj.keys())[2]
-        #                     text = chunk_obj[key_]['text']
-        #                     clear_output(wait=True)
-        #                     output.append(text)
-        #                     yield text
-        #                     time.sleep(0.04)
-        # if(index == len(st.session_state.questions_)):
-        #     st.write_stream(stream_)
-        #     if(isinstance(st.session_state.answers_[index-1]['answer'],botocore.eventstream.EventStream)):
-        #         st.session_state.answers_[index-1]['answer'] = "".join(output)
-        # else:
-        #     st.write(ans_)
-        # polly_response = polly_client.synthesize_speech(VoiceId='Joanna',
-        #                 OutputFormat='ogg_vorbis',
-        #                 Text = ans_,
-        #                 Engine = 'neural')
-        # audio_col1, audio_col2 = st.columns([50,50])
-        # with audio_col1:
-        #     st.audio(polly_response['AudioStream'].read(), format="audio/ogg")
-        #st.markdown("<div style='font-size:18px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;border-radius: 10px;'>"+ans_+"</div>", unsafe_allow_html = True)
-    #st.markdown("<div style='color:#e28743';padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 10px;'><b>Relevant images from the document :</b></div>", unsafe_allow_html = True)
-    #st.write("")
     colu1,colu2,colu3 = st.columns([4,82,20])
     if(answer['source']!={}):
         with colu2:
             with st.expander("Agent Traces:"):
                 st.write(answer['source'])
-        #         with st.container():
-        #             if(len(res_img)>0):
-        #                 with st.expander("Images:"):
-        #                     col3,col4,col5 = st.columns([33,33,33])
-        #                     cols = [col3,col4]
-        #                     idx = 0
-        #                     #print(res_img)
-        #                     for img_ in res_img:
-        #                         if(img_['file'].lower()!='none' and idx < 2):
-        #                             img = img_['file'].split(".")[0]
-        #                             caption = img_['caption']
-        #                             with cols[idx]:
-        #                                 st.image(parent_dirname+"/figures/"+st.session_state.input_index+"/"+img+".jpg")
-        #                                 #st.write(caption)
-        #                             idx = idx+1
-        #             #st.markdown("<div style='color:#e28743';padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 10px;'><b>Sources from the document:</b></div>", unsafe_allow_html = True)
-        #             if(len(answer["table"] )>0):
-        #                 with st.expander("Table:"):
-        #                     df = pd.read_csv(answer["table"][0]['name'],skipinitialspace = True, on_bad_lines='skip',delimiter='`')
-        #                     df.fillna(method='pad', inplace=True)
-        #                     st.table(df)
-        #             with st.expander("Raw sources:"):
-        #                 st.write(answer["source"])
-        # with col_3:
-        #     #st.markdown("<div style='color:#e28743;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 5px;'><b>"+",".join(st.session_state.input_rag_searchType)+"</b></div>", unsafe_allow_html = True)
-        #     if(index == len(st.session_state.questions_)):
-        #         rdn_key = ''.join([random.choice(string.ascii_letters)
-        #                           for _ in range(10)])
-        #         currentValue = ''.join(st.session_state.input_rag_searchType)+str(st.session_state.input_is_rerank)+str(st.session_state.input_table_with_sql)+st.session_state.input_index
-        #         oldValue = ''.join(st.session_state.inputs_["rag_searchType"])+str(st.session_state.inputs_["is_rerank"])+str(st.session_state.inputs_["table_with_sql"])+str(st.session_state.inputs_["index"])
-        #         #print("changing values-----------------")
-        #         def on_button_click():
-        #             # print("button clicked---------------")
-        #             # print(currentValue)
-        #             # print(oldValue)
-        #             if(currentValue!=oldValue or 1==1):
-        #                 #print("----------regenerate----------------")
-        #                 st.session_state.input_query = st.session_state.questions_[-1]["question"]
-        #                 st.session_state.answers_.pop()
-        #                 st.session_state.questions_.pop()
-        #                 handle_input()
-        #                 with placeholder.container():
-        #                     render_all()
-        #         if("currentValue"  in st.session_state):
-        #             del st.session_state["currentValue"]
-        #         try:
-        #             del regenerate
-        #         except:
-        #             pass
-        #         #print("------------------------")
-        #         #print(st.session_state)
-        #         placeholder__ = st.empty()
-        #         placeholder__.button("🔄",key=rdn_key,on_click=on_button_click)
 #Each answer will have context of the question asked in order to associate the provided feedback with the respective question
 def write_chat_message(md, q,index):
-    #res_img = md['image']
-    #st.session_state['session_id'] = res['session_id']   to be added in memory
     chat = st.container()
     with chat:
-        #print("st.session_state.input_index------------------")
-        #print(st.session_state.input_index)
         render_answer(q,md,index)
 def render_all():
@@ -480,173 +248,8 @@ with placeholder.container():
 st.markdown("")
 col_2, col_3 = st.columns([75,20])
-#col_1, col_2, col_3 = st.columns([7.5,71.5,22])
-# with col_1:
-#     st.markdown("<p style='padding:0px 0px 0px 0px; color:#FF9900;font-size:120%'><b>Ask:</b></p>",unsafe_allow_html=True, help = 'Enter the questions and click on "GO"')
 with col_2:
-    #st.markdown("")
     input = st.text_input( "Ask here",label_visibility = "collapsed",key="input_shopping_query")
 with col_3:
-    #hidden = st.button("RUN",disabled=True,key = "hidden")
-    # audio_value = st.audio_input("Record a voice message")
-    # print(audio_value)
     play = st.button("Go",on_click=handle_input,key = "play")
-#with st.sidebar:
-    # st.page_link("/home/ubuntu/AI-search-with-amazon-opensearch-service/OpenSearchApp/app.py", label=":orange[Home]", icon="🏠")
-    # st.subheader(":blue[Sample Data]")
-    # coln_1,coln_2 = st.columns([70,30])
-    # # index_select = st.radio("Choose one index",["UK Housing","Covid19 impacts on Ireland","Environmental Global Warming","BEIR Research"],
-    # #                         captions = ['[preview](https://github.com/aws-samples/AI-search-with-amazon-opensearch-service/blob/b559f82c07dfcca973f457c0a15d6444752553ab/rag/sample_pdfs/HPI-Jan-2024-Hometrack.pdf)',
-    # #                                     '[preview](https://github.com/aws-samples/AI-search-with-amazon-opensearch-service/blob/b559f82c07dfcca973f457c0a15d6444752553ab/rag/sample_pdfs/covid19_ie.pdf)',
-    # #                                     '[preview](https://github.com/aws-samples/AI-search-with-amazon-opensearch-service/blob/b559f82c07dfcca973f457c0a15d6444752553ab/rag/sample_pdfs/global_warming.pdf)',
-    # #                                     '[preview](https://github.com/aws-samples/AI-search-with-amazon-opensearch-service/blob/b559f82c07dfcca973f457c0a15d6444752553ab/rag/sample_pdfs/BEIR.pdf)'],
-    # #                         key="input_rad_index")
-    # with coln_1:
-    #     index_select = st.radio("Choose one index",["UK Housing","Global Warming stats","Covid19 impacts on Ireland"],key="input_rad_index")
-    # with coln_2:
-    #     st.markdown("<p style='font-size:15px'>Preview file</p>",unsafe_allow_html=True)
-    #     st.write("[:eyes:](https://github.com/aws-samples/AI-search-with-amazon-opensearch-service/blob/b559f82c07dfcca973f457c0a15d6444752553ab/rag/sample_pdfs/HPI-Jan-2024-Hometrack.pdf)")
-    #     st.write("[:eyes:](https://github.com/aws-samples/AI-search-with-amazon-opensearch-service/blob/b559f82c07dfcca973f457c0a15d6444752553ab/rag/sample_pdfs/global_warming.pdf)")
-    #     st.write("[:eyes:](https://github.com/aws-samples/AI-search-with-amazon-opensearch-service/blob/b559f82c07dfcca973f457c0a15d6444752553ab/rag/sample_pdfs/covid19_ie.pdf)")
-    #     #st.write("[:eyes:](https://github.com/aws-samples/AI-search-with-amazon-opensearch-service/blob/b559f82c07dfcca973f457c0a15d6444752553ab/rag/sample_pdfs/BEIR.pdf)")
-    # st.markdown("""
-    # <style>
-    # [data-testid=column]:nth-of-type(2) [data-testid=stVerticalBlock]{
-    #     gap: 0rem;
-    # }
-    # [data-testid=column]:nth-of-type(1) [data-testid=stVerticalBlock]{
-    #     gap: 0rem;
-    # }
-    # </style>
-    # """,unsafe_allow_html=True)
-    # # Initialize boto3 to use the S3 client.
-    # s3_client = boto3.resource('s3')
-    # bucket=s3_client.Bucket(s3_bucket_)
-    # objects = bucket.objects.filter(Prefix="sample_pdfs/")
-    # urls = []
-    # client = boto3.client('s3')
-    # for obj in objects:
-    #     if obj.key.endswith('.pdf'):
-    #         # Generate the S3 presigned URL
-    #         s3_presigned_url = client.generate_presigned_url(
-    #             ClientMethod='get_object',
-    #             Params={
-    #                 'Bucket': s3_bucket_,
-    #                 'Key': obj.key
-    #             },
-    #             ExpiresIn=3600
-    #         )
-    #         # Print the created S3 presigned URL
-    #         print(s3_presigned_url)
-    #         urls.append(s3_presigned_url)
-    #         #st.write("["+obj.key.split('/')[1]+"]("+s3_presigned_url+")")
-    #         st.link_button(obj.key.split('/')[1], s3_presigned_url)
-    # st.subheader(":blue[Your multi-modal documents]")
-    # pdf_doc_ = st.file_uploader(
-    #     "Upload your PDFs here and click on 'Process'", accept_multiple_files=False)
-    # pdf_docs = [pdf_doc_]
-    # if st.button("Process"):
-    #     with st.spinner("Processing"):
-    #         if os.path.isdir(parent_dirname+"/pdfs") == False:
-    #             os.mkdir(parent_dirname+"/pdfs")
-    #         for pdf_doc in pdf_docs:
-    #             print(type(pdf_doc))
-    #             pdf_doc_name = (pdf_doc.name).replace(" ","_")
-    #             with open(os.path.join(parent_dirname+"/pdfs",pdf_doc_name),"wb") as f:
-    #                 f.write(pdf_doc.getbuffer())
-    #             request_ = { "bucket": s3_bucket_,"key": pdf_doc_name}
-    #             # if(st.session_state.input_copali_rerank):
-    #             #     copali.process_doc(request_)
-    #             # else:
-    #             rag_DocumentLoader.load_docs(request_)
-    #             print('lambda done')
-    #     st.success('you can start searching on your PDF')
-    # ############## haystach demo temporary addition ############
-    # # st.subheader(":blue[Multimodality]")
-    # # colu1,colu2 = st.columns([50,50])
-    # # with colu1:
-    # #     in_images = st.toggle('Images', key = 'in_images', disabled = False)
-    # # with colu2:
-    # #     in_tables = st.toggle('Tables', key = 'in_tables', disabled = False)
-    # # if(in_tables):
-    # #     st.session_state.input_table_with_sql = True
-    # # else:
-    # #     st.session_state.input_table_with_sql = False
-    #  ############## haystach demo temporary addition ############
-    # if(pdf_doc_ is None or pdf_doc_ == ""):
-    #     if(index_select == "Global Warming stats"):
-    #         st.session_state.input_index = "globalwarmingnew"
-    #     if(index_select == "Covid19 impacts on Ireland"):
-    #         st.session_state.input_index = "covid19ie"#"choosetheknnalgorithmforyourbillionscaleusecasewithopensearchawsbigdatablog"
-    #     if(index_select == "BEIR"):
-    #         st.session_state.input_index = "2104"
-    #     if(index_select == "UK Housing"):
-    #         st.session_state.input_index = "hpijan2024hometrack"
-    #         # if(in_images == True and in_tables == True):
-    #         #     st.session_state.input_index = "hpijan2024hometrack"
-    #         # else:
-    #         #     if(in_images == True and in_tables == False):
-    #         #         st.session_state.input_index = "hpijan2024hometrackno_table"
-    #         #     else:
-    #         #         if(in_images == False and in_tables == True):
-    #         #             st.session_state.input_index = "hpijan2024hometrackno_images"
-    #         #         else:
-    #         #             st.session_state.input_index = "hpijan2024hometrack_no_img_no_table"
-    # # if(in_images):
-    # #     st.session_state.input_include_images = True
-    # # else:
-    # #     st.session_state.input_include_images = False
-    # # if(in_tables):
-    # #     st.session_state.input_include_tables = True
-    # # else:
-    # #     st.session_state.input_include_tables = False
-    # custom_index = st.text_input("If uploaded the file already, enter the original file name", value = "")
-    # if(custom_index!=""):
-    #     st.session_state.input_index = re.sub('[^A-Za-z0-9]+', '', (custom_index.lower().replace(".pdf","").split("/")[-1].split(".")[0]).lower())
-    # st.subheader(":blue[Retriever]")
-    # search_type = st.multiselect('Select the Retriever(s)',
-    # ['Keyword Search',
-    # 'Vector Search',
-    # 'Sparse Search',
-    # ],
-    # ['Sparse Search'],
-    # key = 'input_rag_searchType',
-    # help = "Select the type of Search, adding more than one search type will activate hybrid search"#\n1. Conversational Search (Recommended) - This will include both the OpenSearch and LLM in the retrieval pipeline \n (note: This will put opensearch response as context to LLM to answer) \n2. OpenSearch vector search - This will put only OpenSearch's vector search in the pipeline, \n(Warning: this will lead to unformatted results )\n3. LLM Text Generation - This will include only LLM in the pipeline, \n(Warning: This will give hallucinated and out of context answers)"
-    # )
-    # re_rank = st.checkbox('Re-rank results', key = 'input_re_rank', disabled = False, value = True, help = "Checking this box will re-rank the results using a cross-encoder model")
-    # if(re_rank):
-    #     st.session_state.input_is_rerank = True
-    # else:
-    #     st.session_state.input_is_rerank = False
-    # # copali_rerank = st.checkbox("Search and Re-rank with Token level vectors",key = 'copali_rerank',help = "Enabling this option uses 'Copali' model's page level image embeddings to retrieve documents and MaxSim to re-rank the pages.\n\n Hugging Face Model: https://huggingface.co/vidore/colpali")
-    # # if(copali_rerank):
-    # #     st.session_state.input_copali_rerank = True
-    # # else:
-    # #     st.session_state.input_copali_rerank = False

 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 st.set_page_config(
     layout="wide",
     page_icon="images/opensearch_mark_default.png"
 )
 AI_ICON = "images/opensearch-twitter-card.png"
 REGENERATE_ICON = "images/regenerate.png"
 s3_bucket_ = "pdf-repo-uploads"
 polly_client = boto3.Session(
             region_name='us-east-1').client('polly')
 # Check if the user ID is already stored in the session state
 if 'user_id' in st.session_state:
     user_id = st.session_state['user_id']
 # If the user ID is not yet stored in the session state, generate a random UUID
 else:
     user_id = str(uuid.uuid4())
 if "answers__" not in st.session_state:
     st.session_state.answers__ = []
 if "input_is_rerank" not in st.session_state:
     st.session_state.input_is_rerank = True
 if "input_table_with_sql" not in st.session_state:
     st.session_state.input_table_with_sql = False
 if "inputs_" not in st.session_state:
     st.session_state.inputs_ = {}
 if "input_shopping_query" not in st.session_state:
+    st.session_state.input_shopping_query="get me shoes suitable for trekking"
 if "input_rag_searchType" not in st.session_state:
     st.session_state.input_rag_searchType = ["Sparse Search"]
 region = 'us-east-1'
 output = []
 service = 'es'
     </style>
     """,unsafe_allow_html=True)
 def write_logo():
     col1, col2, col3 = st.columns([5, 1, 5])
     with col2:
         st.page_link("app.py", label=":orange[Home]", icon="🏠")
         st.header("AI Shopping assistant",divider='rainbow')
     with col2:
         st.write("")
         st.write("")
     st.session_state.input_shopping_query=""
     st.session_state.session_id_ = str(uuid.uuid1())
     bedrock_agent.delete_memory()
 def handle_input():
     if(st.session_state.input_shopping_query==''):
         return ""
     inputs = {}
             inputs[key.removeprefix('input_')] = st.session_state[key]
     st.session_state.inputs_ = inputs
     question_with_id = {
         'question': inputs["shopping_query"],
         'id': len(st.session_state.questions__)
     st.session_state.input_shopping_query=""
 def write_user_message(md):
     col1, col2 = st.columns([3,97])
     with col1:
         st.image(USER_ICON, use_column_width='always')
     with col2:
         st.markdown("<div style='color:#e28743';font-size:18px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 10px;font-style: italic;'>"+md['question']+"</div>", unsafe_allow_html = True)
         ans_ = answer['answer']
         span_ans = ans_.replace('<question>',"<span style='fontSize:18px;color:#f37709;fontStyle:italic;'>").replace("</question>","</span>")
         st.markdown("<p>"+span_ans+"</p>",unsafe_allow_html = True)
         if(answer['last_tool']['name'] in ["generate_images","get_relevant_items_for_image","get_relevant_items_for_text","retrieve_with_hybrid_search","retrieve_with_keyword_search","get_any_general_recommendation"]):
             use_interim_results = True
             src_dict =json.loads(answer['last_tool']['response'].replace("'",'"'))
         if(use_interim_results and answer['last_tool']['name']!= 'generate_images' and answer['last_tool']['name']!= 'get_any_general_recommendation'):
             key_ = answer['last_tool']['name']
                 if(index ==1):
                     with img_col2:
                         st.image(resizedImg,use_column_width = True,caption = item['title'])
         if(answer['last_tool']['name'] == "generate_images" or answer['last_tool']['name'] == "get_any_general_recommendation"):
             st.write("<br>",unsafe_allow_html = True)
             gen_img_col1, gen_img_col2,gen_img_col2 = st.columns([30,30,30])
             with gen_img_col1:
                 st.image(resizedImg,caption = "Generated image for "+key.split(".")[0],use_column_width = True)
             st.write("<br>",unsafe_allow_html = True)
     colu1,colu2,colu3 = st.columns([4,82,20])
     if(answer['source']!={}):
         with colu2:
             with st.expander("Agent Traces:"):
                 st.write(answer['source'])
 #Each answer will have context of the question asked in order to associate the provided feedback with the respective question
 def write_chat_message(md, q,index):
     chat = st.container()
     with chat:
         render_answer(q,md,index)
 def render_all():
 st.markdown("")
 col_2, col_3 = st.columns([75,20])
 with col_2:
     input = st.text_input( "Ask here",label_visibility = "collapsed",key="input_shopping_query")
 with col_3:
     play = st.button("Go",on_click=handle_input,key = "play")

pages/Semantic_Search.py CHANGED Viewed

@@ -24,24 +24,18 @@ import base64
 import shutil
 import re
 from requests.auth import HTTPBasicAuth
-#import utilities.re_ranker as re_ranker
 # from nltk.stem import PorterStemmer
 # from nltk.tokenize import word_tokenize
 import query_rewrite
 import amazon_rekognition
 #from st_click_detector import click_detector
 import llm_eval
 import all_search_execute
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 st.set_page_config(
-    #page_title="Semantic Search using OpenSearch",
-    #layout="wide",
     page_icon="images/opensearch_mark_default.png"
 )
 parent_dirname = "/".join((os.path.dirname(__file__)).split("/")[0:-1])
@@ -58,11 +52,6 @@ st.markdown("""
 #ps = PorterStemmer()
 st.session_state.REGION = 'us-east-1'
-#from langchain.callbacks.base import BaseCallbackHandler
 USER_ICON = "images/user.png"
 AI_ICON = "images/opensearch-twitter-card.png"
 REGENERATE_ICON = "images/regenerate.png"
@@ -170,12 +159,6 @@ if "input_ndcg" not in st.session_state:
 if "gen_image_str" not in st.session_state:
     st.session_state.gen_image_str=""
-# if "input_searchType" not in st.session_state:
-#     st.session_state.input_searchType = ['Keyword Search']
-# if "input_must" not in st.session_state:
-#     st.session_state.input_must = ["Category","Price","Gender","Style"]
 if "input_NormType" not in st.session_state:
     st.session_state.input_NormType = "min_max"
@@ -261,25 +244,8 @@ if(search_all_type==True):
     'Multimodal Search',
     'NeuralSparse Search',
     ]
-from streamlit.components.v1 import html
-# with st.container():
-#     html("""
-#     <script>
-#         // Locate elements
-#         var decoration = window.parent.document.querySelectorAll('[data-testid="stDecoration"]')[0];
-#         decoration.style.height = "3.0rem";
-#         decoration.style.right = "45px";
-#         // Adjust text decorations
-#         decoration.innerText = "Semantic Search with OpenSearch!"; // Replace with your desired text
-#         decoration.style.fontWeight = "bold";
-#         decoration.style.display = "flex";
-#         decoration.style.justifyContent = "center";
-#         decoration.style.alignItems = "center";
-#         decoration.style.fontWeight = "bold";
-#         decoration.style.backgroundImage = url('/home/ubuntu/AI-search-with-amazon-opensearch-service/OpenSearchApp/images/service_logo.png'); // Remove background image
-#         decoration.style.backgroundSize = "unset"; // Remove background size
-#     </script>
-# """, width=0, height=0)
@@ -448,31 +414,12 @@ def handle_input():
     inputs = {}
-    # if(st.session_state.input_imageUpload == 'yes'):
-    #     st.session_state.input_searchType = 'Multi-modal Search'
-    # if(st.session_state.input_sparse == 'enabled' or st.session_state.input_is_rewrite_query == 'enabled'):
-    #     st.session_state.input_searchType = 'Keyword Search'
     if(st.session_state.input_imageUpload == 'yes' and 'Keyword Search' in st.session_state.input_searchType):
         old_rekog_label = st.session_state.input_rekog_label
         st.session_state.input_rekog_label = amazon_rekognition.extract_image_metadata(st.session_state.bytes_for_rekog)
         if(st.session_state.input_text == ""):
             st.session_state.input_text = st.session_state.input_rekog_label
-    # if(st.session_state.input_imageUpload == 'yes'):
-    #     if(st.session_state.input_searchType!='Multi-modal Search'):
-    #         if(st.session_state.input_searchType=='Keyword Search'):
-    #             if(st.session_state.input_rekognition != 'enabled'):
-    #                 st.error('For Keyword Search using images, enable "Enrich metadata for Images" in the left panel',icon = "🚨")
-    #                 #st.session_state.input_rekognition = 'enabled'
-    #                 st.switch_page('pages/1_Semantic_Search.py')
-    #                 #st.stop()
-    #         else:
-    #             st.error('Please set the search type as "Keyword Search (enabling Enrich metadata for Images) or Multi-modal Search"',icon = "🚨")
-    #             #st.session_state.input_searchType='Multi-modal Search'
-    #             st.switch_page('pages/1_Semantic_Search.py')
-    #             #st.stop()
     weightage = {}
     st.session_state.weights_ = []
@@ -511,44 +458,13 @@ def handle_input():
                 else:
                     weightage[original_key] = 0.0
                     st.session_state[key] = 0.0
     inputs['weightage']=weightage
     st.session_state.input_weightage = weightage
-    print("====================")
-    print(st.session_state.weights_)
-    print(st.session_state.input_weightage )
-    print("====================")
-        #print("***************************")
-        #print(sum(weights_))
-        # if(sum(st.session_state.weights_)!=100):
-        #     st.warning('The total weight of selected search type(s) should be equal to 100',icon = "🚨")
-        #     refresh = st.button("Re-Enter")
-        #     if(refresh):
-        #         st.switch_page('pages/1_Semantic_Search.py')
-        #         st.stop()
-            #         #st.session_state.input_rekognition = 'enabled'
-        #     st.rerun()
     st.session_state.inputs_ = inputs
-    #st.write(inputs)
     question_with_id = {
         'question': inputs["text"],
         'id': len(st.session_state.questions)
@@ -567,19 +483,15 @@ def handle_input():
     if(st.session_state.input_is_rewrite_query == 'enabled' or (st.session_state.input_imageUpload == 'yes' and 'Keyword Search' in st.session_state.input_searchType)):
         query_rewrite.get_new_query_res(st.session_state.input_text)
-        print("-------------------")
-        print(st.session_state.input_rewritten_query)
-        print("-------------------")
     else:
         st.session_state.input_rewritten_query = ""
-    # elif(st.session_state.input_rekog_label!="" and st.session_state.input_rekognition == 'enabled'):
-    #     ans__ = amazon_rekognition.call(st.session_state.input_text,st.session_state.input_rekog_label)
-    # else:
     ans__ = all_search_execute.handler(inputs, st.session_state['session_id'])
     st.session_state.answers.append({
-        'answer': ans__,#all_search_api.call(json.dumps(inputs), st.session_state['session_id']),
         'search_type':inputs['searchType'],
         'id': len(st.session_state.questions)
     })
@@ -587,21 +499,8 @@ def handle_input():
     st.session_state.answers_none_rank = st.session_state.answers
     if(st.session_state.input_evaluate == "enabled"):
         llm_eval.eval(st.session_state.questions, st.session_state.answers)
-    #st.session_state.input_text=""
-    #st.session_state.input_searchType=st.session_state.input_searchType
 def write_top_bar():
-    # st.markdown("""
-    # <style>
-    # [data-testid=column]:nth-of-type(1) [data-testid=stVerticalBlock]{
-    #     gap: 0rem;
-    # }
-    # </style>
-    # """,unsafe_allow_html=True)
-    #print("top bar")
-    # st.title(':mag: AI powered OpenSearch')
-    # st.write("")
-    # st.write("")
     col1, col2,col3,col4  = st.columns([2.5,35,8,7])
     with col1:
         st.image(TEXT_ICON, use_column_width='always')
@@ -630,9 +529,6 @@ def write_top_bar():
                     st.markdown("<div style = 'height:43px'></div>",unsafe_allow_html=True)
                     st.button("Generate",disabled=False,key = "generate",on_click = generate_images, args=(tab1,"default_img"))
-                # image_select = st.select_slider(
-                #     "Select a image",
-                #     options=["Image 1","Image 2","Image 3"], value = None, disabled = st.session_state.radio_disabled,key = "image_select")
                 image_select = st.radio("Choose one image", ["Image 1","Image 2","Image 3"],index=None, horizontal = True,key = 'image_select',disabled = st.session_state.radio_disabled)
                 st.markdown("""
                             <style>
@@ -642,25 +538,10 @@ def write_top_bar():
                             </style>
                             """,unsafe_allow_html=True)
                 if(st.session_state.image_select is not None and st.session_state.image_select !="" and len(st.session_state.img_gen)!=0):
-                    print("image_select")
-                    print("------------")
-                    print(st.session_state.image_select)
                     st.session_state.input_rad_1 = st.session_state.image_select.split(" ")[1]
                 else:
                     st.session_state.input_rad_1 = ""
-                # rad1, rad2,rad3  = st.columns([33,33,33])
-                # with rad1:
-                #     btn1 = st.button("choose image 1", disabled = st.session_state.radio_disabled)
-                # with rad2:
-                #     btn2 = st.button("choose image 2", disabled = st.session_state.radio_disabled)
-                # with rad3:
-                #     btn3 = st.button("choose image 3", disabled = st.session_state.radio_disabled)
-                # if(btn1):
-                #     st.session_state.input_rad_1 = "1"
-                # if(btn2):
-                #     st.session_state.input_rad_1 = "2"
-                # if(btn3):
-                #     st.session_state.input_rad_1 = "3"
         generate_images(tab1,gen_images)
@@ -669,19 +550,11 @@ def write_top_bar():
         with tab2:
             st.session_state.img_doc = st.file_uploader(
             "Upload image", accept_multiple_files=False,type = ['png', 'jpg'])
     return clear,tab1
 clear,tab_ = write_top_bar()
 if clear:
-    print("clear1")
     st.session_state.questions = []
     st.session_state.answers = []
@@ -697,18 +570,7 @@ if clear:
         st.session_state.input_rad_1 = ""
-        # placeholder1 = st.empty()
-        # with placeholder1.container():
-        #     generate_images(tab_,st.session_state.image_prompt)
-    #st.session_state.input_text=""
-    # st.session_state.input_searchType="Conversational Search (RAG)"
-    # st.session_state.input_temperature = "0.001"
-    # st.session_state.input_topK = 200
-    # st.session_state.input_topP = 0.95
-    # st.session_state.input_maxTokens = 1024
 col1, col3, col4 = st.columns([70,18,12])
 with col1:
@@ -732,7 +594,7 @@ with col4:
     evaluate = st.toggle(' ', key = 'evaluate', disabled = False) #help = "Checking this box will use LLM to evaluate results as relevant and irrelevant. \n\n This option increases the latency")
     if(evaluate):
         st.session_state.input_evaluate = "enabled"
-        #llm_eval.eval(st.session_state.questions, st.session_state.answers)
     else:
         st.session_state.input_evaluate = "disabled"
@@ -740,11 +602,7 @@ with col4:
 if(search_all_type == True or 1==1):
     with st.sidebar:
         st.page_link("app.py", label=":orange[Home]", icon="🏠")
-        #st.image('/home/ubuntu/AI-search-with-amazon-opensearch-service/OpenSearchApp/images/service_logo.png', width = 300)
-        #st.warning('Note: After changing any of the below settings, click "SEARCH" button or 🔄 to apply the changes', icon="⚠️")
-        #st.header('     :gear: :orange[Fine-tune Search]')
-        #st.write("Note: After changing any of the below settings, click 'SEARCH' button or '🔄' to apply the changes")
-        #st.subheader(':blue[Keyword Search]')
         ########################## enable for query_rewrite ########################
         rewrite_query = st.checkbox('Auto-apply filters', key = 'query_rewrite', disabled = False, help = "Checking this box will use LLM to rewrite your query. \n\n Here your natural language query is transformed into OpenSearch query with added filters and attributes")
@@ -754,6 +612,8 @@ if(search_all_type == True or 1==1):
                 key = 'input_must',
                )
         ########################## enable for query_rewrite ########################
         ####### Filters   #########
         st.subheader(':blue[Filters]')
@@ -776,25 +636,6 @@ if(search_all_type == True or 1==1):
         clear_filter = st.button("Clear Filters",on_click=clear_filter)
-#             filter_place_holder = st.container()
-#             with filter_place_holder:
-#                 st.selectbox("Select one Category", ("accessories", "books","floral","furniture","hot_dispensed","jewelry","tools","apparel","cold_dispensed","food_service","groceries","housewares","outdoors","salty_snacks","videos","beauty","electronics","footwear","homedecor","instruments","seasonal"),index = None,key = "input_category")
-#                 st.selectbox("Select one Gender", ("male","female"),index = None,key = "input_gender")
-#                 st.slider("Select a range of price", 0, 2000, (0, 0),50, key = "input_price")
-#             st.session_state.input_category=None
-#             st.session_state.input_gender=None
-#             st.session_state.input_price=(0,0)
-        print("--------------------filters---------------")
-        print(st.session_state.input_gender)
-        print(st.session_state.input_manual_filter)
-        print("--------------------filters---------------")
         ####### Filters   #########
         if('NeuralSparse Search' in st.session_state.search_types):
@@ -802,111 +643,21 @@ if(search_all_type == True or 1==1):
             sparse_filter = st.slider('Keep only sparse tokens with weight >=', 0.0, 1.0, 0.5,0.1,key = 'input_sparse_filter', help = 'Use this slider to set the minimum weight that the sparse vector token weights should meet, rest are filtered out')
-        #sql_query = st.checkbox('Re-write as SQL query', key = 'sql_rewrite', disabled = True, help = "In Progress")
         st.session_state.input_is_rewrite_query = 'disabled'
         st.session_state.input_is_sql_query = 'disabled'
         ########################## enable for query_rewrite ########################
         if rewrite_query:
-            #st.write(st.session_state.inputs_)
             st.session_state.input_is_rewrite_query = 'enabled'
-        # if sql_query:
-        #     #st.write(st.session_state.inputs_)
-        #     st.session_state.input_is_sql_query = 'enabled'
-        ########################## enable for sql conversion ########################
-        #st.markdown('---')
-        #st.header('Fine-tune keyword Search', divider='rainbow')
-        #st.subheader('Note: The below selection applies only when the Search type is set to Keyword Search')
-        # st.markdown("<u>Enrich metadata for :</u>",unsafe_allow_html=True)
-        # c3,c4 = st.columns([10,90])
-        # with c4:
-        #     rekognition = st.checkbox('Images', key = 'rekognition', help = "Checking this box will use AI to extract metadata for images that are present in query and documents")
-        # if rekognition:
-        #     #st.write(st.session_state.inputs_)
-        #     st.session_state.input_rekognition = 'enabled'
-        # else:
-        #     st.session_state.input_rekognition = "disabled"
-        #st.markdown('---')
-        #st.header('Fine-tune Hybrid Search', divider='rainbow')
-        #st.subheader('Note: The below parameters apply only when the Search type is set to Hybrid Search')
-        #st.write("---")
-        #if(st.session_state.max_selections == "None"):
         st.subheader(':blue[Hybrid Search]')
-        # st.selectbox('Select the Hybrid Search type',
-        #  ("OpenSearch Hybrid Query","Reciprocal Rank Fusion"),key = 'input_hybridType')
-        # equal_weight = st.button("Give equal weights to selected searches")
-        #st.warning('Weight of each of the selected search type should be greater than 0 and the total weight of all the selected search type(s) should be equal to 100',icon = "⚠️")
-        #st.markdown("<p style = 'font-size:14.5px;font-style:italic;'>Set Weights</p>",unsafe_allow_html=True)
         with st.expander("Set query Weightage:"):
             st.number_input("Keyword %", min_value=0, max_value=100, value=100, step=5,  key='input_Keyword-weight', help=None)
             st.number_input("Vector %", min_value=0, max_value=100, value=0, step=5,  key='input_Vector-weight', help=None)
             st.number_input("Multimodal %", min_value=0, max_value=100, value=0, step=5,  key='input_Multimodal-weight', help=None)
             st.number_input("NeuralSparse %", min_value=0, max_value=100, value=0, step=5,  key='input_NeuralSparse-weight', help=None)
-        # if(equal_weight):
-        #     counter = 0
-        #     num_search = len(st.session_state.input_searchType)
-        #     weight_type = ["input_Keyword-weight","input_Vector-weight","input_Multimodal-weight","input_NeuralSparse-weight"]
-        #     for type in weight_type:
-        #         if(type.split("-")[0].replace("input_","")+ " Search" in st.session_state.input_searchType):
-        #             print("ssssssssssss")
-        #             counter = counter +1
-        #             extra_weight = 100%num_search
-        #             if(counter == num_search):
-        #                 cal_weight = math.trunc(100/num_search)+extra_weight
-        #             else:
-        #                 cal_weight = math.trunc(100/num_search)
-        #             st.session_state[weight_type] = cal_weight
-        #         else:
-        #             st.session_state[weight_type] = 0
-        #weight = st.slider('Weight for Vector Search', 0.0, 1.0, 0.5,0.1,key = 'input_weight', help = 'Use this slider to set the weightage for keyword and vector search, higher values of the slider indicate the increased weightage for semantic search.\n\n This applies only when the search type is set to Hybrid Search')
-        # st.selectbox('Select the Normalisation type',
-        # ('min_max',
-        # 'l2'
-        # ),
-        #st.write("---")
-        # key = 'input_NormType',
-        # disabled = True,
-        # help = "Select the type of Normalisation to be applied on the two sets of scores"
-        # )
-        # st.selectbox('Select the Score Combination type',
-        # ('arithmetic_mean','geometric_mean','harmonic_mean'
-        # ),
-        # key = 'input_CombineType',
-        # disabled = True,
-        # help = "Select the Combination strategy to be used while combining the two scores of the two search queries for every document"
-        # )
-        #st.markdown('---')
-        #st.header('Select the ML Model for text embedding', divider='rainbow')
-        #st.subheader('Note: The below selection applies only when the Search type is set to Vector or Hybrid Search')
         if(st.session_state.re_ranker == "true"):
             st.subheader(':blue[Re-ranking]')
             reranker = st.selectbox('Choose a Re-Ranker',
@@ -916,41 +667,19 @@ if(search_all_type == True or 1==1):
             key = 'input_reranker',
             help = 'Select the Re-Ranker type, select "None" to apply no re-ranking of the results',
-            #on_change = re_ranker.re_rank,
             args=(st.session_state.questions, st.session_state.answers)
             )
-        # st.write("---")
-        # st.subheader('Text Embeddings Model')
-        # model_type = st.selectbox('Select the Text Embeddings Model',
-        # ('Titan-Embed-Text-v1','GPT-J-6B'
-        # ),
-        # key = 'input_modelType',
-        # help = "Select the Text embedding model, this applies only for the vector and hybrid search"
-        # )
-        #st.markdown('---')
-#st.markdown('---')
 def write_user_message(md,ans):
-    #print(ans)
     ans = ans["answer"][0]
     col1, col2, col3 = st.columns([3,40,20])
     with col1:
         st.image(USER_ICON, use_column_width='always')
     with col2:
-        #st.warning(md['question'])
         st.markdown("<div style='fontSize:15px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 10px;'>Input Text: </div><div style='fontSize:25px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 10px;font-style: italic;color:#e28743'>"+md['question']+"</div>", unsafe_allow_html = True)
         if('query_sparse' in ans):
             with st.expander("Expanded Query:"):
@@ -1011,10 +740,7 @@ def render_answer(answer,index):
                 span_color = "red"
             st.markdown("<span style='fontSize:20px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 20px;font-family:Courier New;color:#e28743'>Relevance:" +str('%.3f'%(st.session_state.input_ndcg)) + "</span><span style='font-size:30px;font-weight:bold;color:"+span_color+"'>"+st.session_state.ndcg_increase.split("~")[0] +"</span><span style='font-size:15px;font-weight:bold;font-family:Courier New;color:"+span_color+"'> "+st.session_state.ndcg_increase.split("~")[1]+"</span>", unsafe_allow_html = True)
-            #st.markdown("<span style='font-size:30px;color:"+span_color+"'>"+st.session_state.ndcg_increase.split("~")[0] +"</span><span style='font-size:15px;font-family:Courier New;color:"+span_color+"'>"+st.session_state.ndcg_increase.split("~")[1]+"</span>",unsafe_allow_html = True)
     placeholder_no_results  = st.empty()
@@ -1030,12 +756,7 @@ def render_answer(answer,index):
             continue
-        # imgdata = base64.b64decode(ans['image_binary'])
         format_ = ans['image_url'].split(".")[-1]
-        #urllib.request.urlretrieve(ans['image_url'], "/home/ubuntu/res_images/"+str(i)+"_."+format_)
         Image.MAX_IMAGE_PIXELS = 100000000
         width = 500
@@ -1066,23 +787,6 @@ def render_answer(answer,index):
                     desc__ = ans['desc'].split(" ")
                     final_desc = "<p>"
-                    ###### stemming and highlighting
-                    # ans_text = ans['desc']
-                    # query_text = st.session_state.input_text
-                    # ans_text_stemmed = set(stem_(ans_text))
-                    # query_text_stemmed = set(stem_(query_text))
-                    # common = ans_text_stemmed.intersection( query_text_stemmed)
-                    # #unique = set(document_1_words).symmetric_difference(  )
-                    # desc__stemmed = stem_(desc__)
-                    # for word_ in desc__stemmed:
-                    #     if(word_ in common):
                     for word in desc__:
                         if(re.sub('[^A-Za-z0-9]+', '', word) in res__):
@@ -1104,16 +808,8 @@ def render_answer(answer,index):
                                 filtered_sparse[key] = round(sparse_[key], 2)
                         st.write(filtered_sparse)
                 with st.expander("Document Metadata:",expanded = False):
-                    # if("rekog" in ans):
-                    #     div_size = [50,50]
-                    # else:
-                    #     div_size = [99,1]
-                    # div1,div2 = st.columns(div_size)
-                    # with div1:
                     st.write(":green[default:]")
                     st.json({"category:":ans['category'],"price":str(ans['price']),"gender_affinity":ans['gender_affinity'],"style":ans['style']},expanded = True)
-                    #with div2:
                     if("rekog" in ans):
                         st.write(":green[enriched:]")
                         st.json(ans['rekog'],expanded = True)
@@ -1128,18 +824,7 @@ def render_answer(answer,index):
                                 st.write(":x:")
         i = i+1
-    # with col_2:
-    #     if(st.session_state.input_evaluate == "enabled"):
-    #         st.markdown("<div style='fontSize:12px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;font-weight:bold;height: fit-content;border-radius: 20px;font-family:Courier New;color:#e28743'>DCG: " +str('%.3f'%(st.session_state.input_ndcg)) + "</div>", unsafe_allow_html = True)
-    # with col_2_b:
-    #     span_color = "white"
-    #     if("&uarr;" in st.session_state.ndcg_increase):
-    #         span_color = "green"
-    #     if("&darr;" in st.session_state.ndcg_increase):
-    #         span_color = "red"
-    #     st.markdown("<span style='font-size:30px;color:"+span_color+"'>"+st.session_state.ndcg_increase.split("~")[0] +"</span><span style='font-size:15px;font-family:Courier New;color:"+span_color+"'>"+st.session_state.ndcg_increase.split("~")[1]+"</span>",unsafe_allow_html = True)
     with col_3:
         if(index == len(st.session_state.questions)):
@@ -1155,7 +840,6 @@ def render_answer(answer,index):
                     st.session_state.questions.pop()
                     handle_input()
-                    #re_ranker.re_rank(st.session_state.questions, st.session_state.answers)
                     with placeholder.container():
                         render_all()
@@ -1169,9 +853,6 @@ def render_answer(answer,index):
             except:
                 pass
-            print("------------------------")
-            #print(st.session_state)
             placeholder__ = st.empty()
             placeholder__.button("🔄",key=rdn_key,on_click=on_button_click, help = "This will regenerate the responses with new settings that you entered, Note: To see difference in responses, you should change any of the applicable settings")#,type="primary",use_column_width=True)
@@ -1196,8 +877,6 @@ def render_all():
     index = 0
     for (q, a) in zip(st.session_state.questions, st.session_state.answers):
         index = index +1
-        #print("answers----")
-        #print(a)
         ans_ = st.session_state.answers[0]
         write_user_message(q,ans_)
         write_chat_message(a, q,index)
@@ -1206,6 +885,4 @@ placeholder = st.empty()
 with placeholder.container():
   render_all()
-  #generate_images("",st.session_state.image_prompt)
 st.markdown("")

 import shutil
 import re
 from requests.auth import HTTPBasicAuth
 # from nltk.stem import PorterStemmer
 # from nltk.tokenize import word_tokenize
 import query_rewrite
 import amazon_rekognition
+from streamlit.components.v1 import html
 #from st_click_detector import click_detector
 import llm_eval
 import all_search_execute
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 st.set_page_config(
     page_icon="images/opensearch_mark_default.png"
 )
 parent_dirname = "/".join((os.path.dirname(__file__)).split("/")[0:-1])
 #ps = PorterStemmer()
 st.session_state.REGION = 'us-east-1'
 USER_ICON = "images/user.png"
 AI_ICON = "images/opensearch-twitter-card.png"
 REGENERATE_ICON = "images/regenerate.png"
 if "gen_image_str" not in st.session_state:
     st.session_state.gen_image_str=""
 if "input_NormType" not in st.session_state:
     st.session_state.input_NormType = "min_max"
     'Multimodal Search',
     'NeuralSparse Search',
     ]
     inputs = {}
     if(st.session_state.input_imageUpload == 'yes' and 'Keyword Search' in st.session_state.input_searchType):
         old_rekog_label = st.session_state.input_rekog_label
         st.session_state.input_rekog_label = amazon_rekognition.extract_image_metadata(st.session_state.bytes_for_rekog)
         if(st.session_state.input_text == ""):
             st.session_state.input_text = st.session_state.input_rekog_label
     weightage = {}
     st.session_state.weights_ = []
                 else:
                     weightage[original_key] = 0.0
                     st.session_state[key] = 0.0
     inputs['weightage']=weightage
     st.session_state.input_weightage = weightage
     st.session_state.inputs_ = inputs
     question_with_id = {
         'question': inputs["text"],
         'id': len(st.session_state.questions)
     if(st.session_state.input_is_rewrite_query == 'enabled' or (st.session_state.input_imageUpload == 'yes' and 'Keyword Search' in st.session_state.input_searchType)):
         query_rewrite.get_new_query_res(st.session_state.input_text)
     else:
         st.session_state.input_rewritten_query = ""
     ans__ = all_search_execute.handler(inputs, st.session_state['session_id'])
     st.session_state.answers.append({
+        'answer': ans__,
         'search_type':inputs['searchType'],
         'id': len(st.session_state.questions)
     })
     st.session_state.answers_none_rank = st.session_state.answers
     if(st.session_state.input_evaluate == "enabled"):
         llm_eval.eval(st.session_state.questions, st.session_state.answers)
 def write_top_bar():
     col1, col2,col3,col4  = st.columns([2.5,35,8,7])
     with col1:
         st.image(TEXT_ICON, use_column_width='always')
                     st.markdown("<div style = 'height:43px'></div>",unsafe_allow_html=True)
                     st.button("Generate",disabled=False,key = "generate",on_click = generate_images, args=(tab1,"default_img"))
                 image_select = st.radio("Choose one image", ["Image 1","Image 2","Image 3"],index=None, horizontal = True,key = 'image_select',disabled = st.session_state.radio_disabled)
                 st.markdown("""
                             <style>
                             </style>
                             """,unsafe_allow_html=True)
                 if(st.session_state.image_select is not None and st.session_state.image_select !="" and len(st.session_state.img_gen)!=0):
                     st.session_state.input_rad_1 = st.session_state.image_select.split(" ")[1]
                 else:
                     st.session_state.input_rad_1 = ""
         generate_images(tab1,gen_images)
         with tab2:
             st.session_state.img_doc = st.file_uploader(
             "Upload image", accept_multiple_files=False,type = ['png', 'jpg'])
     return clear,tab1
 clear,tab_ = write_top_bar()
 if clear:
     st.session_state.questions = []
     st.session_state.answers = []
         st.session_state.input_rad_1 = ""
 col1, col3, col4 = st.columns([70,18,12])
 with col1:
     evaluate = st.toggle(' ', key = 'evaluate', disabled = False) #help = "Checking this box will use LLM to evaluate results as relevant and irrelevant. \n\n This option increases the latency")
     if(evaluate):
         st.session_state.input_evaluate = "enabled"
     else:
         st.session_state.input_evaluate = "disabled"
 if(search_all_type == True or 1==1):
     with st.sidebar:
         st.page_link("app.py", label=":orange[Home]", icon="🏠")
         ########################## enable for query_rewrite ########################
         rewrite_query = st.checkbox('Auto-apply filters', key = 'query_rewrite', disabled = False, help = "Checking this box will use LLM to rewrite your query. \n\n Here your natural language query is transformed into OpenSearch query with added filters and attributes")
                 key = 'input_must',
                )
         ########################## enable for query_rewrite ########################
         ####### Filters   #########
         st.subheader(':blue[Filters]')
         clear_filter = st.button("Clear Filters",on_click=clear_filter)
         ####### Filters   #########
         if('NeuralSparse Search' in st.session_state.search_types):
             sparse_filter = st.slider('Keep only sparse tokens with weight >=', 0.0, 1.0, 0.5,0.1,key = 'input_sparse_filter', help = 'Use this slider to set the minimum weight that the sparse vector token weights should meet, rest are filtered out')
         st.session_state.input_is_rewrite_query = 'disabled'
         st.session_state.input_is_sql_query = 'disabled'
         ########################## enable for query_rewrite ########################
         if rewrite_query:
             st.session_state.input_is_rewrite_query = 'enabled'
         st.subheader(':blue[Hybrid Search]')
         with st.expander("Set query Weightage:"):
             st.number_input("Keyword %", min_value=0, max_value=100, value=100, step=5,  key='input_Keyword-weight', help=None)
             st.number_input("Vector %", min_value=0, max_value=100, value=0, step=5,  key='input_Vector-weight', help=None)
             st.number_input("Multimodal %", min_value=0, max_value=100, value=0, step=5,  key='input_Multimodal-weight', help=None)
             st.number_input("NeuralSparse %", min_value=0, max_value=100, value=0, step=5,  key='input_NeuralSparse-weight', help=None)
         if(st.session_state.re_ranker == "true"):
             st.subheader(':blue[Re-ranking]')
             reranker = st.selectbox('Choose a Re-Ranker',
             key = 'input_reranker',
             help = 'Select the Re-Ranker type, select "None" to apply no re-ranking of the results',
             args=(st.session_state.questions, st.session_state.answers)
             )
 def write_user_message(md,ans):
     ans = ans["answer"][0]
     col1, col2, col3 = st.columns([3,40,20])
     with col1:
         st.image(USER_ICON, use_column_width='always')
     with col2:
         st.markdown("<div style='fontSize:15px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 10px;'>Input Text: </div><div style='fontSize:25px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 10px;font-style: italic;color:#e28743'>"+md['question']+"</div>", unsafe_allow_html = True)
         if('query_sparse' in ans):
             with st.expander("Expanded Query:"):
                 span_color = "red"
             st.markdown("<span style='fontSize:20px;padding:3px 7px 3px 7px;borderWidth: 0px;borderColor: red;borderStyle: solid;width: fit-content;height: fit-content;border-radius: 20px;font-family:Courier New;color:#e28743'>Relevance:" +str('%.3f'%(st.session_state.input_ndcg)) + "</span><span style='font-size:30px;font-weight:bold;color:"+span_color+"'>"+st.session_state.ndcg_increase.split("~")[0] +"</span><span style='font-size:15px;font-weight:bold;font-family:Courier New;color:"+span_color+"'> "+st.session_state.ndcg_increase.split("~")[1]+"</span>", unsafe_allow_html = True)
     placeholder_no_results  = st.empty()
             continue
         format_ = ans['image_url'].split(".")[-1]
         Image.MAX_IMAGE_PIXELS = 100000000
         width = 500
                     desc__ = ans['desc'].split(" ")
                     final_desc = "<p>"
                     for word in desc__:
                         if(re.sub('[^A-Za-z0-9]+', '', word) in res__):
                                 filtered_sparse[key] = round(sparse_[key], 2)
                         st.write(filtered_sparse)
                 with st.expander("Document Metadata:",expanded = False):
                     st.write(":green[default:]")
                     st.json({"category:":ans['category'],"price":str(ans['price']),"gender_affinity":ans['gender_affinity'],"style":ans['style']},expanded = True)
                     if("rekog" in ans):
                         st.write(":green[enriched:]")
                         st.json(ans['rekog'],expanded = True)
                                 st.write(":x:")
         i = i+1
     with col_3:
         if(index == len(st.session_state.questions)):
                     st.session_state.questions.pop()
                     handle_input()
                     with placeholder.container():
                         render_all()
             except:
                 pass
             placeholder__ = st.empty()
             placeholder__.button("🔄",key=rdn_key,on_click=on_button_click, help = "This will regenerate the responses with new settings that you entered, Note: To see difference in responses, you should change any of the applicable settings")#,type="primary",use_column_width=True)
     index = 0
     for (q, a) in zip(st.session_state.questions, st.session_state.answers):
         index = index +1
         ans_ = st.session_state.answers[0]
         write_user_message(q,ans_)
         write_chat_message(a, q,index)
 with placeholder.container():
   render_all()
 st.markdown("")

semantic_search/amazon_rekognition.py CHANGED Viewed

@@ -24,12 +24,7 @@ def extract_image_metadata(img):
     MaxLabels = 10,
     MinConfidence = 80.0,
     Settings = {
-    #       "GeneralLabels": {
-    #          "LabelCategoryExclusionFilters": [ "string" ],
-    #          "LabelCategoryInclusionFilters": [ "string" ],
-    #          "LabelExclusionFilters": [ "string" ],
-    #          "LabelInclusionFilters": [ "string" ]
-    #       },
         "ImageProperties": {
             "MaxDominantColors": 5
         }
@@ -76,20 +71,12 @@ def extract_image_metadata(img):
     objects = " ".join(set(objects))
     categories = " ".join(set(categories))
     colors = " ".join(set(colors))
-    print("^^^^^^^^^^^^^^^^^^")
-    print(colors+ " " + objects + " " + categories)
     return colors+ " " + objects + " " + categories
 def call(a,b):
-    print("'''''''''''''''''''''''")
-    print(b)
     if(st.session_state.input_is_rewrite_query == 'enabled' and st.session_state.input_rewritten_query!=""):
-        #st.session_state.input_rewritten_query['query']['bool']['should'].pop()
         st.session_state.input_rewritten_query['query']['bool']['should'].append( {
                     "simple_query_string": {
@@ -112,36 +99,4 @@ def call(a,b):
             }
         st.session_state.input_rewritten_query = rekog_query
-    # response = aos_client.search(
-    #     body = rekog_query,
-    #     index = 'demo-retail-rekognition'
-    #     #pipeline = 'RAG-Search-Pipeline'
-    # )
-    # hits = response['hits']['hits']
-    # print("rewrite-------------------------")
-    # arr = []
-    # for doc in hits:
-    #     # if('b5/b5319e00' in doc['_source']['image_s3_url'] ):
-    #     #     filter_out +=1
-    #     #     continue
-    #     res_ = {"desc":doc['_source']['text'].replace(doc['_source']['metadata']['rekog_all']," ^^^ " +doc['_source']['metadata']['rekog_all']),
-    #             "image_url":doc['_source']['metadata']['image_s3_url']}
-    #     if('highlight' in doc):
-    #         res_['highlight'] = doc['highlight']['text']
-    #     # if('caption_embedding' in doc['_source']):
-    #     #     res_['sparse'] = doc['_source']['caption_embedding']
-    #     # if('query_sparse' in response_ and len(arr) ==0 ):
-    #     #     res_['query_sparse'] = response_["query_sparse"]
-    #     res_['id'] = doc['_id']
-    #     res_['score'] = doc['_score']
-    #     res_['title'] = doc['_source']['text']
-    #     res_['rekog'] = {'color':doc['_source']['metadata']['rekog_color'],'category': doc['_source']['metadata']['rekog_categories'],'objects':doc['_source']['metadata']['rekog_objects']}
-    #     arr.append(res_)
-    # return arr

     MaxLabels = 10,
     MinConfidence = 80.0,
     Settings = {
         "ImageProperties": {
             "MaxDominantColors": 5
         }
     objects = " ".join(set(objects))
     categories = " ".join(set(categories))
     colors = " ".join(set(colors))
     return colors+ " " + objects + " " + categories
 def call(a,b):
     if(st.session_state.input_is_rewrite_query == 'enabled' and st.session_state.input_rewritten_query!=""):
         st.session_state.input_rewritten_query['query']['bool']['should'].append( {
                     "simple_query_string": {
             }
         st.session_state.input_rewritten_query = rekog_query

utilities/invoke_models.py CHANGED Viewed

@@ -24,17 +24,6 @@ bedrock_runtime_client = get_bedrock_client()
-# def generate_image_captions_ml():
-#     model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
-#     feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
-#     tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
-#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-#     model.to(device)
-#     max_length = 16
-#     num_beams = 4
-#     gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
 def invoke_model(input):
     response = bedrock_runtime_client.invoke_model(
         body=json.dumps({
@@ -100,56 +89,7 @@ def invoke_llm_model(input,is_stream):
         return (json.loads(res))['content'][0]['text']
-        # response = bedrock_runtime_client.invoke_model_with_response_stream(
-        # body=json.dumps({
-        #     "prompt": input,
-        #     "max_tokens_to_sample": 300,
-        #     "temperature": 0.5,
-        #     "top_k": 250,
-        #     "top_p": 1,
-        #     "stop_sequences": [
-        #         "\n\nHuman:"
-        #     ],
-        #     # "anthropic_version": "bedrock-2023-05-31"
-        # }),
-        # modelId="anthropic.claude-v2:1",
-        # accept="application/json",
-        # contentType="application/json",
-        # )
-        # stream = response.get('body')
-        # return stream
-    # else:
-    #     response = bedrock_runtime_client.invoke_model_with_response_stream(
-    #         modelId= "anthropic.claude-3-sonnet-20240229-v1:0",
-    #         contentType = "application/json",
-    #         accept = "application/json",
-    #         body = json.dumps({
-    #                     "anthropic_version": "bedrock-2023-05-31",
-    #                     "max_tokens": 1024,
-    #                     "temperature": 0.0001,
-    #                     "top_k": 150,
-    #                     "top_p": 0.7,
-    #                     "stop_sequences": [
-    #                         "\n\nHuman:"
-    #                     ],
-    #                     "messages": [
-    #                     {
-    #                         "role": "user",
-    #                         "content":input
-    #                         }
-    #                         ]
-    #                     }
-    #                      )
-    #         )
-    #     stream = response.get('body')
-    #     return stream
 def read_from_table(file,question):
     print("started table analysis:")
     print("-----------------------")
@@ -175,7 +115,6 @@ def read_from_table(file,question):
         df = pd.read_csv(file,skipinitialspace = True, on_bad_lines='skip',delimiter = "`")
     else:
         df = file
-    #df.fillna(method='pad', inplace=True)
     agent = create_pandas_dataframe_agent(
              model,
              df,
@@ -188,24 +127,7 @@ def read_from_table(file,question):
 def generate_image_captions_llm(base64_string,question):
-    # ant_client = Anthropic()
-    # MODEL_NAME = "claude-3-opus-20240229"
-    # message_list = [
-    # {
-    #     "role": 'user',
-    #     "content": [
-    #         {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": base64_string}},
-    #         {"type": "text", "text": "What is in the image ?"}
-    #     ]
-    # }
-    # ]
-    # response = ant_client.messages.create(
-    # model=MODEL_NAME,
-    # max_tokens=2048,
-    # messages=message_list
-    # )
     response = bedrock_runtime_client.invoke_model(
             modelId= "anthropic.claude-3-haiku-20240307-v1:0",
             contentType = "application/json",
@@ -234,9 +156,5 @@ def generate_image_captions_llm(base64_string,question):
                         }
                         ]
                          }))
-    #print(response)
     response_body = json.loads(response.get("body").read())['content'][0]['text']
-    #print(response_body)
     return response_body

 def invoke_model(input):
     response = bedrock_runtime_client.invoke_model(
         body=json.dumps({
         return (json.loads(res))['content'][0]['text']
 def read_from_table(file,question):
     print("started table analysis:")
     print("-----------------------")
         df = pd.read_csv(file,skipinitialspace = True, on_bad_lines='skip',delimiter = "`")
     else:
         df = file
     agent = create_pandas_dataframe_agent(
              model,
              df,
 def generate_image_captions_llm(base64_string,question):
     response = bedrock_runtime_client.invoke_model(
             modelId= "anthropic.claude-3-haiku-20240307-v1:0",
             contentType = "application/json",
                         }
                         ]
                          }))
     response_body = json.loads(response.get("body").read())['content'][0]['text']
     return response_body

utilities/re_ranker.py DELETED Viewed

@@ -1,127 +0,0 @@
-import boto3
-from botocore.exceptions import ClientError
-import pprint
-import time
-import streamlit as st
-from sentence_transformers import CrossEncoder
-#model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
-####### Add this Kendra Rescore ranking
-#kendra_ranking = boto3.client("kendra-ranking",region_name = 'us-east-1')
-#print("Create a rescore execution plan.")
-# Provide a name for the rescore execution plan
-#name = "MyRescoreExecutionPlan"
-# Set your required additional capacity units
-# Don't set capacity units if you don't require more than 1 unit given by default
-#capacity_units = 2
-# try:
-#     rescore_execution_plan_response = kendra_ranking.create_rescore_execution_plan(
-#         Name = name,
-#         CapacityUnits = {"RescoreCapacityUnits":capacity_units}
-#     )
-#     pprint.pprint(rescore_execution_plan_response)
-#     rescore_execution_plan_id = rescore_execution_plan_response["Id"]
-#     print("Wait for Amazon Kendra to create the rescore execution plan.")
-#     while True:
-#         # Get the details of the rescore execution plan, such as the status
-#         rescore_execution_plan_description = kendra_ranking.describe_rescore_execution_plan(
-#             Id = rescore_execution_plan_id
-#         )
-#         # When status is not CREATING quit.
-#         status = rescore_execution_plan_description["Status"]
-#         print(" Creating rescore execution plan. Status: "+status)
-#         time.sleep(60)
-#         if status != "CREATING":
-#             break
-# except ClientError as e:
-#         print("%s" % e)
-# print("Program ends.")
-#########################
-@st.cache_resource
-def re_rank(self_, rerank_type, search_type, question, answers):
-    ans = []
-    ids = []
-    ques_ans = []
-    query = question[0]['question']
-    for i in answers[0]['answer']:
-        if(self_ == "search"):
-            ans.append({
-                    "Id": i['id'],
-                    "Body": i["desc"],
-                    "OriginalScore": i['score'],
-                    "Title":i["desc"]
-                    })
-            ids.append(i['id'])
-            ques_ans.append((query,i["desc"]))
-        else:
-            ans.append({'text':i})
-            ques_ans.append((query,i))
-    re_ranked = [{}]
-    ####### Add this Kendra Rescore ranking
-    # if(rerank_type == 'Kendra Rescore'):
-    #     rescore_response = kendra_ranking.rescore(
-    #         RescoreExecutionPlanId = 'b2a4d4f3-98ff-4e17-8b69-4c61ed7d91eb',
-    #         SearchQuery = query,
-    #         Documents = ans
-    #     )
-    #     re_ranked[0]['answer']=[]
-    #     for result in rescore_response["ResultItems"]:
-    #         pos_ = ids.index(result['DocumentId'])
-    #         re_ranked[0]['answer'].append(answers[0]['answer'][pos_])
-    #     re_ranked[0]['search_type']=search_type,
-    #     re_ranked[0]['id'] = len(question)
-    #     return re_ranked
-    # if(rerank_type == 'Cross Encoder'):
-    #     scores = model.predict(
-    #                 ques_ans
-    #                     )
-    #     index__ = 0
-    #     for i in ans:
-    #         i['new_score'] = scores[index__]
-    #         index__ = index__+1
-    #     ans_sorted = sorted(ans, key=lambda d: d['new_score'],reverse=True)
-    #     def retreive_only_text(item):
-    #         return item['text']
-    #     if(self_ == 'rag'):
-    #         return list(map(retreive_only_text, ans_sorted))
-    #     re_ranked[0]['answer']=[]
-    #     for j in ans_sorted:
-    #         pos_ = ids.index(j['Id'])
-    #         re_ranked[0]['answer'].append(answers[0]['answer'][pos_])
-    #     re_ranked[0]['search_type']= search_type,
-    #     re_ranked[0]['id'] = len(question)
-    #     return re_ranked