File size: 4,369 Bytes
3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 3fb88a6 8e44dd8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import pandas as pd
import pickle
import random
from sentence_transformers import SentenceTransformer
from utils import (
encode,
cosine_sim,
top_candidates,
candidates_reranking,
intent_classification,
)
from collections import deque
from transformers import pipeline
import torch
from transformers import AutoTokenizer
from dialog_tag import DialogTag
# this class representes main functions of retrieve bot
low_scoring_list = [
"What does it mean?",
"You have two strikes. Three strikes and you’ re out. It’ s a sports metaphor. Explain again!",
"Again, urban slang. In which, I believe I’ m gaining remarkable fluency. So, could you repeat?",
"I’m confused.",
"I can’t comment without violating our agreement that I don’ t criticize you.",
"Oh!",
"I need to use the restroom.",
"Move. Move. Move!",
"I was going to mention it at the time, but then I thought, some day maybe...",
"Well...",
"Apparently... I have no idea!?",
"I’m not sure...",
"Nothing. I say nothing.",
"Well, my friend. Focus and repeat!",
]
class ChatBot:
def __init__(self):
self.vect_data = []
self.scripts = []
self.conversation_history = deque([], maxlen=5)
self.tag_model = None
self.ranking_model = None
self.reranking_model = None
self.device = None
self.tokenizer = None
def load(self):
""" "This method is called first to load all datasets and
model used by the chat bot; all the data to be saved in
tha data folder, models to be loaded from hugging face"""
with open("data/scripts_vectors.pkl", "rb") as fp:
self.vect_data = pickle.load(fp)
self.scripts = pd.read_pickle("data/scripts.pkl")
self.tag_model = DialogTag("distilbert-base-uncased")
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.ranking_model = SentenceTransformer(
"Shakhovak/chatbot_sentence-transformer"
) # # sentence-transformers/LaBSE or sentence-transformers/all-mpnet-base-v2 or Shakhovak/chatbot_sentence-transformer
self.tokenizer_reranking = AutoTokenizer.from_pretrained("bert-base-uncased")
self.reranking_model = pipeline(
model="Shakhovak/RerankerModel_chat_bot",
device=self.device,
tokenizer=self.tokenizer_reranking,
)
def generate_response(self, utterance: str) -> str:
"""this functions identifies potential
candidates for answer and ranks them"""
intent = intent_classification(utterance, utterance, self.tag_model)
query_encoding = encode(
texts=utterance,
intent=intent,
model=self.ranking_model,
contexts=self.conversation_history,
)
bot_cosine_scores = cosine_sim(
self.vect_data,
query_encoding,
)
top_scores, top_indexes = top_candidates(
bot_cosine_scores, intent=intent, initial_data=self.scripts, top=10
)
print(top_scores)
if top_scores[0] < 0.9:
answer = random.choice(low_scoring_list)
self.conversation_history.clear()
else:
# test candidates and collects them with label 0 to dictionary
reranked_dict = candidates_reranking(
top_indexes,
self.conversation_history,
utterance,
self.scripts,
self.reranking_model,
)
# if any candidates were selected, range them and pick up the top
# else keep up the initial top 1
if len(reranked_dict) >= 1:
updated_top_candidates = dict(
sorted(reranked_dict.items(), key=lambda item: item[1])
)
answer = self.scripts.iloc[list(updated_top_candidates.keys())[0]][
"answer"
]
print(self.scripts.iloc[top_indexes[0]]["answer"])
else:
answer = self.scripts.iloc[top_indexes[0]]["answer"]
self.conversation_history.append(utterance)
self.conversation_history.append(answer)
return answer
# katya = ChatBot()
# katya.load()
# katya.generate_response("hi man!")
|