|
from vllm import LLM, SamplingParams |
|
from transformers import AutoTokenizer |
|
|
|
max_seq_length = 2048 |
|
dtype = None |
|
load_in_4bit = True |
|
|
|
instruction_message = """You are tasked with extracting relevant information from a given text representing a candidate profile or job requirement. Your goal is to insert specific tags around relevant pieces of information without altering the original text's formatting, punctuation, or word order. |
|
**Tags to use:** |
|
- `<jobtitle>`: Job title of the candidate or job requirement. |
|
- `<skill>`: Specific skills or competencies mentioned. |
|
- `<minyear>`: Minimum number of years of experience. |
|
- `<maxyear>`: Maximum number of years of experience. |
|
- `<language>`: Languages spoken or required. |
|
- `<location>`: Geographical location related to the candidate or job. |
|
- `<degree>`: Academic qualifications or degrees. |
|
- `<certification>`: Professional certifications or accreditations. |
|
- `<institution_education>`: Names of educational institutions related to the degree. |
|
- `<institution_company>`: Names of companies related to employment history. |
|
- `<proximity>`: Distance or location-related preferences (e.g., "within 30 miles"). |
|
- `<industry>`: Specific industry or sector experience. |
|
**Guidelines:** |
|
1. **Preserve Original Text**: Do not change the original text's formatting, punctuation, or word order. The output should be identical to the input except for the addition of tags. |
|
2. **Tagging**: Enclose each relevant piece of information with the appropriate tag. Do not include commas or other punctuation inside the tags unless they are part of the tagged item. |
|
3. **Experience Years**: Use `<minyear>` as default if only a single year is given, relating to the experience. |
|
4. **IMPORTANT** You **must not** include any tag that is not in the provided list. You can only tag using the ones provided. |
|
Your role is to accurately tag the text while preserving its original appearance. |
|
""" |
|
|
|
conversation_history = [ |
|
{ |
|
"role": "system", |
|
"content": f"{instruction_message}" |
|
}, |
|
{ |
|
"role": "user", |
|
"content": "Financial Analysts located within 50 miles of London, with skills in planning, budgeting, and a Master's Degree in Finance. Previous employment at Goldman Sachs or degree from MIT is a bonus." |
|
}, |
|
{ |
|
"role": "assistant", |
|
"content": "<jobtitle>Financial Analysts</jobtitle> located within <proximity>50 miles</proximity> of <location>London</location>, with skills in <skill>planning</skill>, <skill>budgeting</skill>, and a <degree>Master's Degree in Finance</degree>. Previous employment at <institution_company>Goldman Sachs</institution_company> or degree from <institution_education>MIT</institution_education> is a bonus." |
|
} |
|
] |
|
|
|
class EndpointHandler: |
|
|
|
def __init__(self, path=""): |
|
""" |
|
Initializes the EndpointHandler with a specified model and tokenizer. |
|
|
|
Args: |
|
path (str): The local path or identifier for the model to load. |
|
This path should contain both the model and tokenizer files. |
|
""" |
|
self.llm = LLM(model=path, max_model_len=2048, quantization='awq', gpu_memory_utilization=0.8) |
|
self.tokenizer = AutoTokenizer.from_pretrained(path) |
|
|
|
|
|
def __call__(self, data) -> str: |
|
""" |
|
Processes the input data by generating a formatted conversation history string |
|
and passing it to the language model for generation. |
|
|
|
Args: |
|
data (dict): A dictionary containing the input data with the key `inputs`, |
|
which is a list representing a conversation history. |
|
Each conversation history item should be a dictionary with 'role' |
|
(e.g., "assistant" or "user") and 'content' (the message text). |
|
|
|
Returns: |
|
str: The generated output from the model after processing the conversation history. |
|
""" |
|
|
|
|
|
user_string = data.pop("user_string") |
|
user_example = {"role": "user", "content": user_string} |
|
|
|
conversation_input = conversation_history.copy() |
|
conversation_input.append(user_example) |
|
model_input = self.tokenizer.apply_chat_template(conversation_input, tokenize=False, add_generation_prompt=True) |
|
|
|
|
|
sampling_params = SamplingParams(temperature=0.1, min_p=0.6, max_tokens=1024) |
|
|
|
|
|
output = self.llm.generate(model_input, sampling_params) |
|
generated_text = output[0].outputs[0].text |
|
|
|
return generated_text |