Spaces:

TangSan003
/

question-answering-api

Sleeping

File size: 2,363 Bytes

import torch
from transformers import RobertaTokenizerFast
from utils import RobertaConfig
from model import RobertaForQuestionAnswering
from safetensors.torch import load_file
from datasets import load_dataset
from pprintpp import pprint

# import os

# # Đặt biến môi trường HF_HOME
# os.environ["HF_HOME"] = "/tmp/hf_cache" 

class InferenceModel:
    """
        Quick inference function that works with the models we have trained!
    """

    def __init__(self, path_to_weights, huggingface_model=True):

        self.config = {
            "hf_model_name": "deepset/roberta-base-squad2" # Ví dụ model của bạn
        }
        ### Init Config with either Huggingface Backbone or our own ###
        self.config = RobertaConfig(pretrained_backbone="pretrained_huggingface" if huggingface_model else "random")

        ### Load Tokenizer ###
        self.tokenizer = RobertaTokenizerFast.from_pretrained(self.config.hf_model_name)

        ### Load Model ###
        self.model = RobertaForQuestionAnswering(self.config)

        weights = load_file(path_to_weights)
        self.model.load_state_dict(weights)

        self.model.eval()

    def inference_model(self,
                        question,
                        context):
        ### Tokenize Text
        inputs = self.tokenizer(text=question,
                                text_pair=context,
                                max_length=self.config.context_length,
                                truncation="only_second",
                                return_tensors="pt")
        pass
        ### Pass through Model ####
        with torch.no_grad():
            start_token_logits, end_token_logits = self.model(**inputs)

        ### Grab Start and End Token Idx ###
        start_token_idx = start_token_logits.squeeze().argmax().item()
        end_token_idx = end_token_logits.squeeze().argmax().item()


        ### Slice Tokens and then Decode with Tokenizer (+1 because slice is not right inclusive) ###
        tokens = inputs["input_ids"].squeeze()[start_token_idx:end_token_idx + 1]
        answer = self.tokenizer.decode(tokens, skip_special_tokens=True).strip()

        prediction = {"start_token_idx": start_token_idx,
                      "end_token_idx": end_token_idx,
                      "answer": answer}

        return prediction

    # test model