OpenSearch-AI

Running on T4

App Files Files

prasadnu commited on May 16

Commit

c77dc87

1 Parent(s): bbe0e25

search pipeline updated

Browse files

Files changed (2) hide show

RAG/colpali.py +2 -22
RAG/rag_DocumentSearcher.py +17 -2

RAG/colpali.py CHANGED Viewed

@@ -206,39 +206,23 @@ def colpali_search_rerank(query):
             add_score = 0
             for index,i in enumerate(query_token_vectors):
-                #token = vocab_dict[str(token_ids[index])]
-                #if(token!='[SEP]' and token!='[CLS]'):
                 query_token_vector = np.array(i)
-                #print("query token: "+token)
-                #print("-----------------")
                 scores = []
                 for m in with_s:
-                    #m_arr = m.split("-")
-                    #if(m_arr[-1]!='[SEP]' and m_arr[-1]!='[CLS]'):
-                    #print("document token: "+m_arr[3])
                     doc_token_vector = np.array(m['page_sub_vector'])
                     score = np.dot(query_token_vector,doc_token_vector)
                     scores.append(score)
-                    #print({"doc_token":m_arr[3],"score":score})
                 scores.sort(reverse=True)
                 max_score = scores[0]
                 add_score+=max_score
-                #max_score_dict_list.append(newlist[0])
-                #print(newlist[0])
-            #max_score_dict_list_sorted = sorted(max_score_dict_list, key=lambda d: d['score'], reverse=True)
-            #print(max_score_dict_list_sorted)
-            # print(add_score)
             doc["total_score"] = add_score
-            #doc['max_score_dict_list_sorted'] = max_score_dict_list_sorted
             final_docs.append(doc)
         final_docs_sorted = sorted(final_docs, key=lambda d: d['total_score'], reverse=True)
         final_docs_sorted_20.append(final_docs_sorted[:20])
         img = "/home/user/app/vs/"+final_docs_sorted_20[0][0]['image']
         ans = generate_ans(img,query)
         images_highlighted = [{'file':img}]
-        # if(st.session_state.show_columns == True):
-        #     images_highlighted = img_highlight(img,query_token_vectors,result['query_tokens'])
         st.session_state.top_img = img
         st.session_state.query_token_vectors = query_token_vectors
         st.session_state.query_tokens = result['query_tokens']
@@ -312,12 +296,8 @@ def img_highlight(img,batch_queries,query_tokens):
     # # Get the similarity map for our (only) input image
     similarity_maps = batched_similarity_maps[0]  # (query_length, n_patches_x, n_patches_y)
-    print(f"Similarity map shape: (query_length, n_patches_x, n_patches_y) = {tuple(similarity_maps.shape)}")
-    print(query_tokens)
     query_tokens_from_model = query_tokens[0]['tokens']
-    print(query_tokens_from_model)
-    print(type(query_tokens_from_model))
     plots = plot_all_similarity_maps(
     image=image,
     query_tokens=query_tokens_from_model,

             add_score = 0
             for index,i in enumerate(query_token_vectors):
                 query_token_vector = np.array(i)
                 scores = []
                 for m in with_s:
                     doc_token_vector = np.array(m['page_sub_vector'])
                     score = np.dot(query_token_vector,doc_token_vector)
                     scores.append(score)
                 scores.sort(reverse=True)
                 max_score = scores[0]
                 add_score+=max_score
             doc["total_score"] = add_score
             final_docs.append(doc)
         final_docs_sorted = sorted(final_docs, key=lambda d: d['total_score'], reverse=True)
         final_docs_sorted_20.append(final_docs_sorted[:20])
         img = "/home/user/app/vs/"+final_docs_sorted_20[0][0]['image']
         ans = generate_ans(img,query)
         images_highlighted = [{'file':img}]
         st.session_state.top_img = img
         st.session_state.query_token_vectors = query_token_vectors
         st.session_state.query_tokens = result['query_tokens']
     # # Get the similarity map for our (only) input image
     similarity_maps = batched_similarity_maps[0]  # (query_length, n_patches_x, n_patches_y)
     query_tokens_from_model = query_tokens[0]['tokens']
     plots = plot_all_similarity_maps(
     image=image,
     query_tokens=query_tokens_from_model,

RAG/rag_DocumentSearcher.py CHANGED Viewed

@@ -189,23 +189,38 @@ def query_(awsauth,inputs, session_id,search_types):
         # query_sparse = sparse_["inference_results"][0]["output"][0]["dataAsMap"]["response"][0]
     hits = []
-    if(num_queries>1 or st.session_state.input_is_rerank):
         s_pipeline_url = host + s_pipeline_path
         r = requests.put(s_pipeline_url, auth=awsauth, json=s_pipeline_payload, headers=headers)
         path = st.session_state.input_index+"/_search?search_pipeline=rag-search-pipeline"
     else:
-        path = st.session_state.input_index+"/_search"
     url = host+path
     if(len(hybrid_payload["query"]["hybrid"]["queries"])==1):
         single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
         del hybrid_payload["query"]["hybrid"]
         hybrid_payload["query"] = single_query
         r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
         response_ = json.loads(r.text)
         print(response_)
         hits = response_['hits']['hits']
     else:
         r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
         response_ = json.loads(r.text)
         hits = response_['hits']['hits']

         # query_sparse = sparse_["inference_results"][0]["output"][0]["dataAsMap"]["response"][0]
     hits = []
+    if(num_queries>1):
         s_pipeline_url = host + s_pipeline_path
         r = requests.put(s_pipeline_url, auth=awsauth, json=s_pipeline_payload, headers=headers)
         path = st.session_state.input_index+"/_search?search_pipeline=rag-search-pipeline"
     else:
+        if(input_is_rerank):
+            path = st.session_state.input_index+"/_search?search_pipeline=rerank_pipeline_rag"
+        else:
+            path = st.session_state.input_index+"/_search"
     url = host+path
     if(len(hybrid_payload["query"]["hybrid"]["queries"])==1):
         single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
         del hybrid_payload["query"]["hybrid"]
         hybrid_payload["query"] = single_query
+        if(st.session_state.input_is_rerank):
+            hybrid_payload["ext"] = {"rerank": {
+                                          "query_context": {
+                                             "query_text": question
+                                          }
+                                        }}
         r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
         response_ = json.loads(r.text)
         print(response_)
         hits = response_['hits']['hits']
     else:
+        if(st.session_state.input_is_rerank):
+            hybrid_payload["ext"] = {"rerank": {
+                                          "query_context": {
+                                             "query_text": question
+                                          }
+                                        }}
         r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
         response_ = json.loads(r.text)
         hits = response_['hits']['hits']