Daxtra
/

Qwen2.5-1.5B-Conversational-Entities-Parser-v1-AWQ

+from vllm import LLM, SamplingParams
+from transformers import AutoTokenizer
+max_seq_length = 2048
+dtype = None
+load_in_4bit = True
+instruction_message = """You are tasked with extracting relevant information from a given text representing a candidate profile or job requirement. Your goal is to insert specific tags around relevant pieces of information without altering the original text's formatting, punctuation, or word order.
+**Tags to use:**
+- `<jobtitle>`: Job title of the candidate or job requirement.
+- `<skill>`: Specific skills or competencies mentioned.
+- `<minyear>`: Minimum number of years of experience.
+- `<maxyear>`: Maximum number of years of experience.
+- `<language>`: Languages spoken or required.
+- `<location>`: Geographical location related to the candidate or job.
+- `<degree>`: Academic qualifications or degrees.
+- `<certification>`: Professional certifications or accreditations.
+- `<institution_education>`: Names of educational institutions related to the degree.
+- `<institution_company>`: Names of companies related to employment history.
+- `<proximity>`: Distance or location-related preferences (e.g., "within 30 miles").
+- `<industry>`: Specific industry or sector experience.
+**Guidelines:**
+1. **Preserve Original Text**: Do not change the original text's formatting, punctuation, or word order. The output should be identical to the input except for the addition of tags.
+2. **Tagging**: Enclose each relevant piece of information with the appropriate tag. Do not include commas or other punctuation inside the tags unless they are part of the tagged item.
+3. **Experience Years**: Use `<minyear>` as default if only a single year is given, relating to the experience.
+4. **IMPORTANT** You **must not** include any tag that is not in the provided list. You can only tag using the ones provided.
+Your role is to accurately tag the text while preserving its original appearance.
+"""
+conversation_history = [
+    {
+        "role": "system",
+        "content": f"{instruction_message}"
+    },
+    {
+        "role": "user",
+        "content": "Financial Analysts located within 50 miles of London, with skills in planning, budgeting, and a Master's Degree in Finance. Previous employment at Goldman Sachs or degree from MIT is a bonus."
+    },
+    {
+        "role": "assistant",
+        "content": "<jobtitle>Financial Analysts</jobtitle> located within <proximity>50 miles</proximity> of <location>London</location>, with skills in <skill>planning</skill>, <skill>budgeting</skill>, and a <degree>Master's Degree in Finance</degree>. Previous employment at <institution_company>Goldman Sachs</institution_company> or degree from <institution_education>MIT</institution_education> is a bonus."
+    }
+]
+class EndpointHandler:
+    def __init__(self, path=""):
+        """
+        Initializes the EndpointHandler with a specified model and tokenizer.
+        Args:
+            path (str): The local path or identifier for the model to load.
+                        This path should contain both the model and tokenizer files.
+        """
+        self.llm = LLM(model=path, max_model_len=2048, quantization='awq', gpu_memory_utilization=0.8)
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+    def __call__(self, data) -> str:
+        """
+        Processes the input data by generating a formatted conversation history string
+        and passing it to the language model for generation.
+        Args:
+            data (dict): A dictionary containing the input data with the key `inputs`,
+                         which is a list representing a conversation history.
+                         Each conversation history item should be a dictionary with 'role'
+                         (e.g., "assistant" or "user") and 'content' (the message text).
+        Returns:
+            str: The generated output from the model after processing the conversation history.
+        """
+        # Get inputs and preprocess
+        user_string = data.pop("user_string")
+        user_example = {"role": "user", "content": user_string}
+        conversation_input = conversation_history.copy()
+        conversation_input.append(user_example)
+        model_input = self.tokenizer.apply_chat_template(conversation_input, tokenize=False, add_generation_prompt=True)
+        # Set sampling parameters
+        sampling_params = SamplingParams(temperature=0.1, min_p=0.6, max_tokens=1024)
+        # Generate output
+        output = self.llm.generate(model_input, sampling_params)
+        generated_text = output[0].outputs[0].text
+        return generated_text