Spaces:

mliutdchra
/

HRA_QA_BOT_REST_API_v1

Sleeping

File size: 2,905 Bytes

import gradio
import os
import json
import torch
import numpy as np
from utils import ModelWrapper
from sklearn.metrics.pairwise import cosine_similarity
# load the models and all other utils functions
model_loader = ModelWrapper()

def my_inference_function(question):
    question_embeddings = model_loader.get_embeddings(question, 0)

    # not embed the documents for now
    if 0: 
        files = os.listdir("./documents")
        document_embeddings = {}
        for file in files:
            # open document
            f = open("./documents/"+file,"r", encoding="utf-8")
            f = f.read()

            # get the embedding of the document
            document_embeddings[file] = model_loader.get_embeddings(f, 1).tolist()

        # save the embeddings of all the documents as vector database
        with open("./vectors/embeddings.json","w") as outfile:
            outfile.write(json.dumps(document_embeddings, indent=4))
    

    # open the embeddings for documents
    # will replace with vector database later on
    embeddings_file = open("./vectors/embeddings.json","r")
    document_embeddings = json.load(embeddings_file)

    # linear search for the most relevant document
    max_similarity = -1
    most_relevant_document = None
    for document in document_embeddings:
        cur_similarity = cosine_similarity(question_embeddings, document_embeddings[document])
        if cur_similarity > max_similarity:
            most_relevant_document = document
            max_similarity = cur_similarity

    if max_similarity >= 0.35:       
        with open("./documents/"+most_relevant_document, "r", encoding="utf-8") as f:
            f = f.read()
            inputs = model_loader.tokenizer(question, f, return_tensors="pt")
            with torch.no_grad():
                outputs = model_loader.model_qa(**inputs)

            answer_start_index = outputs.start_logits.argmax()
            answer_end_index = outputs.end_logits.argmax()

            predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
            predict_answer = model_loader.tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
            
            if predict_answer is None:
                predict_answer = "I can't answer your question right now. I am evolving ..."
            
            ret = {"answer":predict_answer, "most_relevant_document": most_relevant_document, "cosine_similarity": str(max_similarity)}
    else:
        ret = {"answer": "Sorry we can't find the relevant document", "most_relevant_document": "None", "cosine_similarity": str(-1)}


    return ret

gradio_interface = gradio.Interface(fn = my_inference_function,
    inputs = "text",
    outputs = "json",
    examples = ["Where did Robert Kauffman graduate?", "What's the position of Fred Danback?"],
    title = "HRA Leadership QA Bot"
)

gradio_interface.launch()