OpenSearch-AI

Running on T4

App Files Files

prasadnu commited on May 19

Commit

475a4b0

1 Parent(s): 48ccdfb

rerank model

Browse files

Files changed (2) hide show

pages/Semantic_Search.py +66 -72
semantic_search/query_rewrite.py +3 -87

pages/Semantic_Search.py CHANGED Viewed

@@ -747,83 +747,77 @@ def render_answer(answer,index):
     col_1, col_2,col_3 = st.columns([70,10,20])
     i = 0
     filter_out = 0
-    for ans in answer:
-        if('b5/b5319e00' in ans['image_url'] ):
-            filter_out+=1
-            continue
-        format_ = ans['image_url'].split(".")[-1]
-        Image.MAX_IMAGE_PIXELS = 100000000
-        width = 500
-        height = 500
-        with col_1:
-            inner_col_1,inner_col_2 = st.columns([8,92])
-            with inner_col_2:
-                st.image(ans['image_url'].replace("/home/ec2-user/SageMaker/","/home/user/"))
-                if("highlight" in ans and 'Keyword Search' in st.session_state.input_searchType):
-                    test_strs = ans["highlight"]
-                    tag = "em"
-                    res__ = []
-                    for test_str in test_strs:
-                        start_idx = test_str.find("<" + tag + ">")
-                        while start_idx != -1:
-                            end_idx = test_str.find("</" + tag + ">", start_idx)
-                            if end_idx == -1:
-                                break
-                            res__.append(test_str[start_idx+len(tag)+2:end_idx])
-                            start_idx = test_str.find("<" + tag + ">", end_idx)
-                    desc__ = ans['desc'].split(" ")
-                    final_desc = "<p>"
-                    for word in desc__:
-                        if(re.sub('[^A-Za-z0-9]+', '', word) in res__):
-                            final_desc +=  "<span style='color:#e28743;font-weight:bold'>"+word+"</span> "
-                        else:
-                            final_desc += word + " "
-                    final_desc += "</p>"
-                    st.markdown(final_desc,unsafe_allow_html = True)
-                else:
-                    st.write(ans['desc'])
-                if("sparse" in ans):
-                    with st.expander("Expanded document:"):
-                        sparse_ = dict(sorted(ans['sparse'].items(), key=lambda item: item[1],reverse=True))
-                        filtered_sparse = dict()
-                        for key in sparse_:
-                            if(sparse_[key]>=1.0):
-                                filtered_sparse[key] = round(sparse_[key], 2)
-                        st.write(filtered_sparse)
-                with st.expander("Document Metadata:",expanded = False):
-                    st.write(":green[default:]")
-                    st.json({"category:":ans['category'],"price":str(ans['price']),"gender_affinity":ans['gender_affinity'],"style":ans['style']},expanded = True)
-                    if("rekog" in ans):
-                        st.write(":green[enriched:]")
-                        st.json(ans['rekog'],expanded = True)
-            with inner_col_1:
-                if(st.session_state.input_evaluate == "enabled"):
-                    with st.container(border = False):
-                        if("relevant" in ans.keys()):
-                            if(ans['relevant']==True):
-                                st.write(":white_check_mark:")
-                            else:
-                                st.write(":x:")
-        i = i+1
     with col_3:
         if(index == len(st.session_state.questions)):

     col_1, col_2,col_3 = st.columns([70,10,20])
     i = 0
     filter_out = 0
+    if len(answer) == 0:
+        st.write("No results found")
+    else:
+        for ans in answer:
+            if('b5/b5319e00' in ans['image_url'] ):
+                filter_out+=1
+                continue
+            format_ = ans['image_url'].split(".")[-1]
+            Image.MAX_IMAGE_PIXELS = 100000000
+            width = 500
+            height = 500
+            with col_1:
+                inner_col_1,inner_col_2 = st.columns([8,92])
+                with inner_col_2:
+                    st.image(ans['image_url'].replace("/home/ec2-user/SageMaker/","/home/user/"))
+                    if("highlight" in ans and 'Keyword Search' in st.session_state.input_searchType):
+                        test_strs = ans["highlight"]
+                        tag = "em"
+                        res__ = []
+                        for test_str in test_strs:
+                            start_idx = test_str.find("<" + tag + ">")
+                            while start_idx != -1:
+                                end_idx = test_str.find("</" + tag + ">", start_idx)
+                                if end_idx == -1:
+                                    break
+                                res__.append(test_str[start_idx+len(tag)+2:end_idx])
+                                start_idx = test_str.find("<" + tag + ">", end_idx)
+                        desc__ = ans['desc'].split(" ")
+                        final_desc = "<p>"
+                        for word in desc__:
+                            if(re.sub('[^A-Za-z0-9]+', '', word) in res__):
+                                final_desc +=  "<span style='color:#e28743;font-weight:bold'>"+word+"</span> "
+                            else:
+                                final_desc += word + " "
+                        final_desc += "</p>"
+                        st.markdown(final_desc,unsafe_allow_html = True)
+                    else:
+                        st.write(ans['desc'])
+                    if("sparse" in ans):
+                        with st.expander("Expanded document:"):
+                            sparse_ = dict(sorted(ans['sparse'].items(), key=lambda item: item[1],reverse=True))
+                            filtered_sparse = dict()
+                            for key in sparse_:
+                                if(sparse_[key]>=1.0):
+                                    filtered_sparse[key] = round(sparse_[key], 2)
+                            st.write(filtered_sparse)
+                    with st.expander("Document Metadata:",expanded = False):
+                        st.write(":green[default:]")
+                        st.json({"category:":ans['category'],"price":str(ans['price']),"gender_affinity":ans['gender_affinity'],"style":ans['style']},expanded = True)
+                        if("rekog" in ans):
+                            st.write(":green[enriched:]")
+                            st.json(ans['rekog'],expanded = True)
+                with inner_col_1:
+                    if(st.session_state.input_evaluate == "enabled"):
+                        with st.container(border = False):
+                            if("relevant" in ans.keys()):
+                                if(ans['relevant']==True):
+                                    st.write(":white_check_mark:")
+                                else:
+                                    st.write(":x:")
+            i = i+1
     with col_3:
         if(index == len(st.session_state.questions)):

semantic_search/query_rewrite.py CHANGED Viewed

@@ -252,16 +252,6 @@ def get_new_query_res(query):
         query = st.session_state.input_rekog_label
     if(st.session_state.input_is_rewrite_query == 'enabled'):
-        # query_struct = query_constructor.invoke(
-        #     {
-        #         "query": query
-        #     }
-        # )
-        # print("***prompt****")
-        # print(prompt)
-        # print("******query_struct******")
-        # print(query_struct)
         res = invoke_models.invoke_llm_model( prompt_.format(query=query,schema = schema)  ,False)
         inter_query = res[7:-3].replace('\\"',"'").replace("\n","")
         print("inter_query")
@@ -294,43 +284,8 @@ def get_new_query_res(query):
                         draft_new_query['bool']['must'].append(q_dash)
                     else:
                         draft_new_query['bool']['should'].append(q_dash)
-            # if('should' in result_query_llm['bool']):
-            #     for q_ in result_query_llm['bool']['must']:
-            #         q__dash = json.loads(json.dumps(q_).replace('term','match'  ))
-            #         clause = list(q__dash.keys())[0]category
-            #         long_field = list(q__dash[clause].keys())[0]
-            #         get_attr = long_field.split(".")[1]
-            #         q__dash[clause][get_attr] = q__dash[clause][long_field]
-            #         draft_new_query['bool']['should'].append(q__dash)
-        #print(draft_new_query)
-        query_ = draft_new_query#json.loads(json.dumps(opts.visit_structured_query(query_struct)[1]['filter']).replace("must","should"))#.replace("must","should")
-        # if('bool' in query_ and 'should' in query_['bool']):
-        #     query_['bool']['should'].append({
-        #             "match": {
-        #                 "rekog_description_plus_original_description": query
-        #             }
-        #         })
-        # else:
-        #     query_['bool']['should'] = {
-        #             "match": {
-        #                 "rekog_description_plus_original_description": query
-        #             }
-        #         }
-        # def find_by_key(data, target):
-        #     for key, value in data.items():
-        #         if isinstance(value, dict):
-        #             yield from find_by_key(value, target)
-        #         elif key == target:
-        #             yield value
-        # for x in find_by_key(query_, "metadata.category.keyword"):
-        #     imp_item = x
         ###### find the main subject of the query
@@ -405,46 +360,7 @@ def get_new_query_res(query):
         st.session_state.input_rewritten_query = {"query":query_}
         print(st.session_state.input_rewritten_query)
-    # if(st.session_state.input_rekog_label!="" and query!=st.session_state.input_rekog_label):
-    #     amazon_rekognition.call(st.session_state.input_text,st.session_state.input_rekog_label)
-    # #return searchWithNewQuery(st.session_state.input_rewritten_query)
-# def searchWithNewQuery(new_query):
-#     response = aos_client.search(
-#         body = new_query,
-#         index = "demo-retail-rekognition"#'self-query-rewrite-retail',
-#         #pipeline = 'RAG-Search-Pipeline'
-#     )
-#     hits = response['hits']['hits']
-#     print("rewrite-------------------------")
-#     arr = []
-#     for doc in hits:
-#         # if('b5/b5319e00' in doc['_source']['image_s3_url'] ):
-#         #     filter_out +=1
-#         #     continue
-#         res_ = {"desc":doc['_source']['text'],"image_url":doc['_source']['metadata']['image_s3_url']}
-#         if('highlight' in doc):
-#             res_['highlight'] = doc['highlight']['text']
-#         # if('caption_embedding' in doc['_source']):
-#         #     res_['sparse'] = doc['_source']['caption_embedding']
-#         # if('query_sparse' in response_ and len(arr) ==0 ):
-#         #     res_['query_sparse'] = response_["query_sparse"]
-#         res_['id'] = doc['_id']
-#         res_['score'] = doc['_score']
-#         res_['title'] = doc['_source']['text']
-#         arr.append(res_)
-#     return arr

         query = st.session_state.input_rekog_label
     if(st.session_state.input_is_rewrite_query == 'enabled'):
         res = invoke_models.invoke_llm_model( prompt_.format(query=query,schema = schema)  ,False)
         inter_query = res[7:-3].replace('\\"',"'").replace("\n","")
         print("inter_query")
                         draft_new_query['bool']['must'].append(q_dash)
                     else:
                         draft_new_query['bool']['should'].append(q_dash)
+        query_ = draft_new_query
         ###### find the main subject of the query
         st.session_state.input_rewritten_query = {"query":query_}
         print(st.session_state.input_rewritten_query)