Spaces:
Running
on
T4
Running
on
T4
search pipeline updated
Browse files- RAG/rag_DocumentSearcher.py +1 -1
- pages/Semantic_Search.py +1 -6
- semantic_search/all_search_execute.py +0 -25
- utilities/re_ranker.py +22 -26
RAG/rag_DocumentSearcher.py
CHANGED
@@ -12,7 +12,7 @@ import re
|
|
12 |
import torch
|
13 |
import base64
|
14 |
import requests
|
15 |
-
import utilities.re_ranker as re_ranker
|
16 |
import utilities.invoke_models as invoke_models
|
17 |
#import langchain
|
18 |
headers = {"Content-Type": "application/json"}
|
|
|
12 |
import torch
|
13 |
import base64
|
14 |
import requests
|
15 |
+
#import utilities.re_ranker as re_ranker
|
16 |
import utilities.invoke_models as invoke_models
|
17 |
#import langchain
|
18 |
headers = {"Content-Type": "application/json"}
|
pages/Semantic_Search.py
CHANGED
@@ -24,7 +24,7 @@ import base64
|
|
24 |
import shutil
|
25 |
import re
|
26 |
from requests.auth import HTTPBasicAuth
|
27 |
-
import utilities.re_ranker as re_ranker
|
28 |
# from nltk.stem import PorterStemmer
|
29 |
# from nltk.tokenize import word_tokenize
|
30 |
import query_rewrite
|
@@ -585,11 +585,6 @@ def handle_input():
|
|
585 |
})
|
586 |
|
587 |
st.session_state.answers_none_rank = st.session_state.answers
|
588 |
-
if(st.session_state.input_reranker == "None"):
|
589 |
-
st.session_state.answers = st.session_state.answers_none_rank
|
590 |
-
else:
|
591 |
-
if(st.session_state.input_reranker == 'Kendra Rescore'):
|
592 |
-
st.session_state.answers = re_ranker.re_rank("search",st.session_state.input_reranker,st.session_state.input_searchType,st.session_state.questions, st.session_state.answers)
|
593 |
if(st.session_state.input_evaluate == "enabled"):
|
594 |
llm_eval.eval(st.session_state.questions, st.session_state.answers)
|
595 |
#st.session_state.input_text=""
|
|
|
24 |
import shutil
|
25 |
import re
|
26 |
from requests.auth import HTTPBasicAuth
|
27 |
+
#import utilities.re_ranker as re_ranker
|
28 |
# from nltk.stem import PorterStemmer
|
29 |
# from nltk.tokenize import word_tokenize
|
30 |
import query_rewrite
|
|
|
585 |
})
|
586 |
|
587 |
st.session_state.answers_none_rank = st.session_state.answers
|
|
|
|
|
|
|
|
|
|
|
588 |
if(st.session_state.input_evaluate == "enabled"):
|
589 |
llm_eval.eval(st.session_state.questions, st.session_state.answers)
|
590 |
#st.session_state.input_text=""
|
semantic_search/all_search_execute.py
CHANGED
@@ -408,23 +408,12 @@ def handler(input_,session_id):
|
|
408 |
hybrid_payload["query"]["hybrid"]["queries"].append(sparse_payload)
|
409 |
|
410 |
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
print("hybrid_payload")
|
418 |
-
print(st.session_state.re_ranker)
|
419 |
-
print("---------------")
|
420 |
docs = []
|
421 |
|
422 |
if(st.session_state.input_sql_query!=""):
|
423 |
url = host +"_plugins/_sql?format=json"
|
424 |
payload = {"query":st.session_state.input_sql_query}
|
425 |
r = requests.post(url, auth=awsauth, json=payload, headers=headers)
|
426 |
-
print("^^^^^")
|
427 |
-
print(r.text)
|
428 |
|
429 |
if(len(hybrid_payload["query"]["hybrid"]["queries"])==1):
|
430 |
single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
|
@@ -439,24 +428,14 @@ def handler(input_,session_id):
|
|
439 |
}
|
440 |
}}
|
441 |
|
442 |
-
print(hybrid_payload)
|
443 |
-
print(url)
|
444 |
r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
|
445 |
-
print(r.status_code)
|
446 |
-
print(r.text)
|
447 |
response_ = json.loads(r.text)
|
448 |
-
print("-------------------------------------------------------------------")
|
449 |
-
#print(response_)
|
450 |
docs = response_['hits']['hits']
|
451 |
|
452 |
|
453 |
else:
|
454 |
|
455 |
|
456 |
-
print("hybrid_payload")
|
457 |
-
print(hybrid_payload)
|
458 |
-
print("-------------------------------------------------------------------")
|
459 |
-
|
460 |
if( st.session_state.input_hybridType == "OpenSearch Hybrid Query"):
|
461 |
url_ = url + "?search_pipeline=hybrid_search_pipeline"
|
462 |
|
@@ -469,12 +448,8 @@ def handler(input_,session_id):
|
|
469 |
"query_text": query
|
470 |
}
|
471 |
}}
|
472 |
-
print(url_)
|
473 |
r = requests.get(url_, auth=awsauth, json=hybrid_payload, headers=headers)
|
474 |
-
print(r.status_code)
|
475 |
response_ = json.loads(r.text)
|
476 |
-
print("-------------------------------------------------------------------")
|
477 |
-
print(response_)
|
478 |
docs = response_['hits']['hits']
|
479 |
|
480 |
else:
|
|
|
408 |
hybrid_payload["query"]["hybrid"]["queries"].append(sparse_payload)
|
409 |
|
410 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
411 |
docs = []
|
412 |
|
413 |
if(st.session_state.input_sql_query!=""):
|
414 |
url = host +"_plugins/_sql?format=json"
|
415 |
payload = {"query":st.session_state.input_sql_query}
|
416 |
r = requests.post(url, auth=awsauth, json=payload, headers=headers)
|
|
|
|
|
417 |
|
418 |
if(len(hybrid_payload["query"]["hybrid"]["queries"])==1):
|
419 |
single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
|
|
|
428 |
}
|
429 |
}}
|
430 |
|
|
|
|
|
431 |
r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
|
|
|
|
|
432 |
response_ = json.loads(r.text)
|
|
|
|
|
433 |
docs = response_['hits']['hits']
|
434 |
|
435 |
|
436 |
else:
|
437 |
|
438 |
|
|
|
|
|
|
|
|
|
439 |
if( st.session_state.input_hybridType == "OpenSearch Hybrid Query"):
|
440 |
url_ = url + "?search_pipeline=hybrid_search_pipeline"
|
441 |
|
|
|
448 |
"query_text": query
|
449 |
}
|
450 |
}}
|
|
|
451 |
r = requests.get(url_, auth=awsauth, json=hybrid_payload, headers=headers)
|
|
|
452 |
response_ = json.loads(r.text)
|
|
|
|
|
453 |
docs = response_['hits']['hits']
|
454 |
|
455 |
else:
|
utilities/re_ranker.py
CHANGED
@@ -5,7 +5,7 @@ import time
|
|
5 |
import streamlit as st
|
6 |
from sentence_transformers import CrossEncoder
|
7 |
|
8 |
-
model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
|
9 |
####### Add this Kendra Rescore ranking
|
10 |
#kendra_ranking = boto3.client("kendra-ranking",region_name = 'us-east-1')
|
11 |
#print("Create a rescore execution plan.")
|
@@ -48,11 +48,7 @@ model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
|
|
48 |
|
49 |
|
50 |
def re_rank(self_, rerank_type, search_type, question, answers):
|
51 |
-
|
52 |
-
print("start")
|
53 |
-
print()
|
54 |
-
|
55 |
-
|
56 |
ans = []
|
57 |
ids = []
|
58 |
ques_ans = []
|
@@ -94,34 +90,34 @@ def re_rank(self_, rerank_type, search_type, question, answers):
|
|
94 |
# re_ranked[0]['id'] = len(question)
|
95 |
# return re_ranked
|
96 |
|
97 |
-
if(rerank_type == 'Cross Encoder'):
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
|
108 |
-
|
109 |
|
110 |
|
111 |
-
|
112 |
-
|
113 |
|
114 |
-
|
115 |
-
|
116 |
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
|
126 |
|
127 |
|
|
|
5 |
import streamlit as st
|
6 |
from sentence_transformers import CrossEncoder
|
7 |
|
8 |
+
#model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
|
9 |
####### Add this Kendra Rescore ranking
|
10 |
#kendra_ranking = boto3.client("kendra-ranking",region_name = 'us-east-1')
|
11 |
#print("Create a rescore execution plan.")
|
|
|
48 |
|
49 |
|
50 |
def re_rank(self_, rerank_type, search_type, question, answers):
|
51 |
+
|
|
|
|
|
|
|
|
|
52 |
ans = []
|
53 |
ids = []
|
54 |
ques_ans = []
|
|
|
90 |
# re_ranked[0]['id'] = len(question)
|
91 |
# return re_ranked
|
92 |
|
93 |
+
# if(rerank_type == 'Cross Encoder'):
|
94 |
|
95 |
+
# scores = model.predict(
|
96 |
+
# ques_ans
|
97 |
+
# )
|
98 |
|
99 |
+
# index__ = 0
|
100 |
+
# for i in ans:
|
101 |
+
# i['new_score'] = scores[index__]
|
102 |
+
# index__ = index__+1
|
103 |
|
104 |
+
# ans_sorted = sorted(ans, key=lambda d: d['new_score'],reverse=True)
|
105 |
|
106 |
|
107 |
+
# def retreive_only_text(item):
|
108 |
+
# return item['text']
|
109 |
|
110 |
+
# if(self_ == 'rag'):
|
111 |
+
# return list(map(retreive_only_text, ans_sorted))
|
112 |
|
113 |
|
114 |
+
# re_ranked[0]['answer']=[]
|
115 |
+
# for j in ans_sorted:
|
116 |
+
# pos_ = ids.index(j['Id'])
|
117 |
+
# re_ranked[0]['answer'].append(answers[0]['answer'][pos_])
|
118 |
+
# re_ranked[0]['search_type']= search_type,
|
119 |
+
# re_ranked[0]['id'] = len(question)
|
120 |
+
# return re_ranked
|
121 |
|
122 |
|
123 |
|