OpenSearch-AI

Running on T4

App Files Files Community

OpenSearch-AI / utilities /re_ranker.py

prasadnu

RAG fix

2e2dda5 4 months ago

raw

history blame

4.13 kB

	import boto3
	from botocore.exceptions import ClientError
	import pprint
	import time
	import streamlit as st
	from sentence_transformers import CrossEncoder

	model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
	kendra_ranking = boto3.client("kendra-ranking",region_name = 'us-east-1')


	print("Create a rescore execution plan.")

	# Provide a name for the rescore execution plan
	name = "MyRescoreExecutionPlan"
	# Set your required additional capacity units
	# Don't set capacity units if you don't require more than 1 unit given by default
	capacity_units = 2

	# try:
	# rescore_execution_plan_response = kendra_ranking.create_rescore_execution_plan(
	# Name = name,
	# CapacityUnits = {"RescoreCapacityUnits":capacity_units}
	# )

	# pprint.pprint(rescore_execution_plan_response)

	# rescore_execution_plan_id = rescore_execution_plan_response["Id"]

	# print("Wait for Amazon Kendra to create the rescore execution plan.")

	# while True:
	# # Get the details of the rescore execution plan, such as the status
	# rescore_execution_plan_description = kendra_ranking.describe_rescore_execution_plan(
	# Id = rescore_execution_plan_id
	# )
	# # When status is not CREATING quit.
	# status = rescore_execution_plan_description["Status"]
	# print(" Creating rescore execution plan. Status: "+status)
	# time.sleep(60)
	# if status != "CREATING":
	# break

	# except ClientError as e:
	# print("%s" % e)

	# print("Program ends.")



	def re_rank(self_, rerank_type, search_type, question, answers):

	print("start")
	print()


	ans = []
	ids = []
	ques_ans = []
	query = question[0]['question']
	for i in answers[0]['answer']:
	if(self_ == "search"):

	ans.append({
	"Id": i['id'],
	"Body": i["desc"],
	"OriginalScore": i['score'],
	"Title":i["desc"]
	})
	ids.append(i['id'])
	ques_ans.append((query,i["desc"]))

	else:
	ans.append({'text':i})

	ques_ans.append((query,i))



	re_ranked = [{}]





	if(rerank_type == 'Kendra Rescore'):




	rescore_response = kendra_ranking.rescore(
	RescoreExecutionPlanId = 'b2a4d4f3-98ff-4e17-8b69-4c61ed7d91eb',
	SearchQuery = query,
	Documents = ans
	)


	#[{'DocumentId': 'DocId1', 'Score': 2.0}, {'DocumentId': 'DocId2', 'Score': 1.0}]


	re_ranked[0]['answer']=[]
	for result in rescore_response["ResultItems"]:

	pos_ = ids.index(result['DocumentId'])

	re_ranked[0]['answer'].append(answers[0]['answer'][pos_])
	re_ranked[0]['search_type']=search_type,
	re_ranked[0]['id'] = len(question)

	#st.session_state.answers_none_rank = st.session_state.answers
	return re_ranked


	# if(rerank_type == 'None'):

	# st.session_state.answers = st.session_state.answers_none_rank


	if(rerank_type == 'Cross Encoder'):

	scores = model.predict(
	ques_ans
	)

	print("scores")
	print(scores)
	index__ = 0
	for i in ans:
	i['new_score'] = scores[index__]
	index__ = index__+1

	ans_sorted = sorted(ans, key=lambda d: d['new_score'],reverse=True)


	def retreive_only_text(item):
	return item['text']

	if(self_ == 'rag'):
	return list(map(retreive_only_text, ans_sorted))


	re_ranked[0]['answer']=[]
	for j in ans_sorted:
	pos_ = ids.index(j['Id'])
	re_ranked[0]['answer'].append(answers[0]['answer'][pos_])
	re_ranked[0]['search_type']= search_type,
	re_ranked[0]['id'] = len(question)
	return re_ranked




	#return st.session_state.answers