Daxtra
/

Phi-3.5-mini-Conversational-Boolean-Parser-v1-AWQ

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

Phi-3.5-mini-Conversational-Boolean-Parser-v1-AWQ / handler.py

Daxtra's picture

Create handler.py

5a00116 verified 11 days ago

3.88 kB

	from vllm import LLM, SamplingParams
	from transformers import AutoTokenizer

	max_seq_length = 2048
	dtype = None
	load_in_4bit = True

	instruction_message = """You are a boolean logic parser for natural language queries in a conversational search recruitment system. Your task is to insert XML tags that capture boolean operators (AND, OR, NOT) without altering the original query's text, spelling, grammar, word order, or formatting. The goal is to tag components that correspond to boolean logic while preserving the original sentence structure.
	The rules you must follow are:
	1. Use the <AND> tag for items that are of the same type, such as skills, certifications, or qualifications, when they are listed together.
	2. Use the <OR> tag for items of the same type when they are expressed as alternatives.
	3. Use the <NOT> tag for negations, and apply it to the entire negated portion. Nested boolean operators inside <NOT> are unnecessary.
	4. Avoid tagging unrelated components (e.g., degree and location) together even if they are connected by coordinating conjunctions.
	5. Do not nest boolean operators. Each operator (AND, OR, NOT) should be handled separately and should not be placed within another operator's tag. Nested logic is not required.
	When the tags are removed, the query must remain exactly as it was originally written. Your role is to accurately capture the underlying boolean logic, not to rewrite or rephrase the query.
	"""

	conversation_history = [
	{
	"role": "system",
	"content": f"{instruction_message}"
	},
	{
	"role": "user",
	"content": "Seeking a Cyclist or Supply Technician with business collaboration and product revenue, with no experience at Google."
	},
	{
	"role": "assistant",
	"content": "Seeking a <OR>Cyclist or Supply Technician</OR> with <AND>business collaboration and product revenue</AND>, with <NOT>no experience at Google.</NOT>"
	},
	]

	class EndpointHandler:

	def __init__(self, path=""):
	"""
	Initializes the EndpointHandler with a specified model and tokenizer.

	Args:
	path (str): The local path or identifier for the model to load.
	This path should contain both the model and tokenizer files.
	"""
	self.llm = LLM(model=path, max_model_len=2048, quantization='awq', gpu_memory_utilization=0.8)
	self.tokenizer = AutoTokenizer.from_pretrained(path)


	def __call__(self, data) -> str:
	"""
	Processes the input data by generating a formatted conversation history string
	and passing it to the language model for generation.

	Args:
	data (dict): A dictionary containing the input data with the key `inputs`,
	which is a list representing a conversation history.
	Each conversation history item should be a dictionary with 'role'
	(e.g., "assistant" or "user") and 'content' (the message text).

	Returns:
	str: The generated output from the model after processing the conversation history.
	"""

	# Get inputs and preprocess
	user_string = data.pop("user_string")
	user_example = {"role": "user", "content": user_string}

	conversation_input = conversation_history.copy()
	conversation_input.append(user_example)
	model_input = self.tokenizer.apply_chat_template(conversation_input, tokenize=False, add_generation_prompt=True)

	# Set sampling parameters
	sampling_params = SamplingParams(temperature=0.1, min_p=0.6, max_tokens=1024)

	# Generate output
	output = self.llm.generate(model_input, sampling_params)
	generated_text = output[0].outputs[0].text.replace("<\|end\|>", "").strip()

	return generated_text