prasadnu commited on
Commit
6f4ec47
·
1 Parent(s): d7b594f

search pipeline updated

Browse files
RAG/rag_DocumentSearcher.py CHANGED
@@ -12,7 +12,7 @@ import re
12
  import torch
13
  import base64
14
  import requests
15
- import utilities.re_ranker as re_ranker
16
  import utilities.invoke_models as invoke_models
17
  #import langchain
18
  headers = {"Content-Type": "application/json"}
 
12
  import torch
13
  import base64
14
  import requests
15
+ #import utilities.re_ranker as re_ranker
16
  import utilities.invoke_models as invoke_models
17
  #import langchain
18
  headers = {"Content-Type": "application/json"}
pages/Semantic_Search.py CHANGED
@@ -24,7 +24,7 @@ import base64
24
  import shutil
25
  import re
26
  from requests.auth import HTTPBasicAuth
27
- import utilities.re_ranker as re_ranker
28
  # from nltk.stem import PorterStemmer
29
  # from nltk.tokenize import word_tokenize
30
  import query_rewrite
@@ -585,11 +585,6 @@ def handle_input():
585
  })
586
 
587
  st.session_state.answers_none_rank = st.session_state.answers
588
- if(st.session_state.input_reranker == "None"):
589
- st.session_state.answers = st.session_state.answers_none_rank
590
- else:
591
- if(st.session_state.input_reranker == 'Kendra Rescore'):
592
- st.session_state.answers = re_ranker.re_rank("search",st.session_state.input_reranker,st.session_state.input_searchType,st.session_state.questions, st.session_state.answers)
593
  if(st.session_state.input_evaluate == "enabled"):
594
  llm_eval.eval(st.session_state.questions, st.session_state.answers)
595
  #st.session_state.input_text=""
 
24
  import shutil
25
  import re
26
  from requests.auth import HTTPBasicAuth
27
+ #import utilities.re_ranker as re_ranker
28
  # from nltk.stem import PorterStemmer
29
  # from nltk.tokenize import word_tokenize
30
  import query_rewrite
 
585
  })
586
 
587
  st.session_state.answers_none_rank = st.session_state.answers
 
 
 
 
 
588
  if(st.session_state.input_evaluate == "enabled"):
589
  llm_eval.eval(st.session_state.questions, st.session_state.answers)
590
  #st.session_state.input_text=""
semantic_search/all_search_execute.py CHANGED
@@ -408,23 +408,12 @@ def handler(input_,session_id):
408
  hybrid_payload["query"]["hybrid"]["queries"].append(sparse_payload)
409
 
410
 
411
-
412
-
413
-
414
-
415
-
416
-
417
- print("hybrid_payload")
418
- print(st.session_state.re_ranker)
419
- print("---------------")
420
  docs = []
421
 
422
  if(st.session_state.input_sql_query!=""):
423
  url = host +"_plugins/_sql?format=json"
424
  payload = {"query":st.session_state.input_sql_query}
425
  r = requests.post(url, auth=awsauth, json=payload, headers=headers)
426
- print("^^^^^")
427
- print(r.text)
428
 
429
  if(len(hybrid_payload["query"]["hybrid"]["queries"])==1):
430
  single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
@@ -439,24 +428,14 @@ def handler(input_,session_id):
439
  }
440
  }}
441
 
442
- print(hybrid_payload)
443
- print(url)
444
  r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
445
- print(r.status_code)
446
- print(r.text)
447
  response_ = json.loads(r.text)
448
- print("-------------------------------------------------------------------")
449
- #print(response_)
450
  docs = response_['hits']['hits']
451
 
452
 
453
  else:
454
 
455
 
456
- print("hybrid_payload")
457
- print(hybrid_payload)
458
- print("-------------------------------------------------------------------")
459
-
460
  if( st.session_state.input_hybridType == "OpenSearch Hybrid Query"):
461
  url_ = url + "?search_pipeline=hybrid_search_pipeline"
462
 
@@ -469,12 +448,8 @@ def handler(input_,session_id):
469
  "query_text": query
470
  }
471
  }}
472
- print(url_)
473
  r = requests.get(url_, auth=awsauth, json=hybrid_payload, headers=headers)
474
- print(r.status_code)
475
  response_ = json.loads(r.text)
476
- print("-------------------------------------------------------------------")
477
- print(response_)
478
  docs = response_['hits']['hits']
479
 
480
  else:
 
408
  hybrid_payload["query"]["hybrid"]["queries"].append(sparse_payload)
409
 
410
 
 
 
 
 
 
 
 
 
 
411
  docs = []
412
 
413
  if(st.session_state.input_sql_query!=""):
414
  url = host +"_plugins/_sql?format=json"
415
  payload = {"query":st.session_state.input_sql_query}
416
  r = requests.post(url, auth=awsauth, json=payload, headers=headers)
 
 
417
 
418
  if(len(hybrid_payload["query"]["hybrid"]["queries"])==1):
419
  single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
 
428
  }
429
  }}
430
 
 
 
431
  r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
 
 
432
  response_ = json.loads(r.text)
 
 
433
  docs = response_['hits']['hits']
434
 
435
 
436
  else:
437
 
438
 
 
 
 
 
439
  if( st.session_state.input_hybridType == "OpenSearch Hybrid Query"):
440
  url_ = url + "?search_pipeline=hybrid_search_pipeline"
441
 
 
448
  "query_text": query
449
  }
450
  }}
 
451
  r = requests.get(url_, auth=awsauth, json=hybrid_payload, headers=headers)
 
452
  response_ = json.loads(r.text)
 
 
453
  docs = response_['hits']['hits']
454
 
455
  else:
utilities/re_ranker.py CHANGED
@@ -5,7 +5,7 @@ import time
5
  import streamlit as st
6
  from sentence_transformers import CrossEncoder
7
 
8
- model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
9
  ####### Add this Kendra Rescore ranking
10
  #kendra_ranking = boto3.client("kendra-ranking",region_name = 'us-east-1')
11
  #print("Create a rescore execution plan.")
@@ -48,11 +48,7 @@ model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
48
 
49
 
50
  def re_rank(self_, rerank_type, search_type, question, answers):
51
-
52
- print("start")
53
- print()
54
-
55
-
56
  ans = []
57
  ids = []
58
  ques_ans = []
@@ -94,34 +90,34 @@ def re_rank(self_, rerank_type, search_type, question, answers):
94
  # re_ranked[0]['id'] = len(question)
95
  # return re_ranked
96
 
97
- if(rerank_type == 'Cross Encoder'):
98
 
99
- scores = model.predict(
100
- ques_ans
101
- )
102
 
103
- index__ = 0
104
- for i in ans:
105
- i['new_score'] = scores[index__]
106
- index__ = index__+1
107
 
108
- ans_sorted = sorted(ans, key=lambda d: d['new_score'],reverse=True)
109
 
110
 
111
- def retreive_only_text(item):
112
- return item['text']
113
 
114
- if(self_ == 'rag'):
115
- return list(map(retreive_only_text, ans_sorted))
116
 
117
 
118
- re_ranked[0]['answer']=[]
119
- for j in ans_sorted:
120
- pos_ = ids.index(j['Id'])
121
- re_ranked[0]['answer'].append(answers[0]['answer'][pos_])
122
- re_ranked[0]['search_type']= search_type,
123
- re_ranked[0]['id'] = len(question)
124
- return re_ranked
125
 
126
 
127
 
 
5
  import streamlit as st
6
  from sentence_transformers import CrossEncoder
7
 
8
+ #model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
9
  ####### Add this Kendra Rescore ranking
10
  #kendra_ranking = boto3.client("kendra-ranking",region_name = 'us-east-1')
11
  #print("Create a rescore execution plan.")
 
48
 
49
 
50
  def re_rank(self_, rerank_type, search_type, question, answers):
51
+
 
 
 
 
52
  ans = []
53
  ids = []
54
  ques_ans = []
 
90
  # re_ranked[0]['id'] = len(question)
91
  # return re_ranked
92
 
93
+ # if(rerank_type == 'Cross Encoder'):
94
 
95
+ # scores = model.predict(
96
+ # ques_ans
97
+ # )
98
 
99
+ # index__ = 0
100
+ # for i in ans:
101
+ # i['new_score'] = scores[index__]
102
+ # index__ = index__+1
103
 
104
+ # ans_sorted = sorted(ans, key=lambda d: d['new_score'],reverse=True)
105
 
106
 
107
+ # def retreive_only_text(item):
108
+ # return item['text']
109
 
110
+ # if(self_ == 'rag'):
111
+ # return list(map(retreive_only_text, ans_sorted))
112
 
113
 
114
+ # re_ranked[0]['answer']=[]
115
+ # for j in ans_sorted:
116
+ # pos_ = ids.index(j['Id'])
117
+ # re_ranked[0]['answer'].append(answers[0]['answer'][pos_])
118
+ # re_ranked[0]['search_type']= search_type,
119
+ # re_ranked[0]['id'] = len(question)
120
+ # return re_ranked
121
 
122
 
123