|
import torch |
|
from transformers import pipeline |
|
|
|
|
|
|
|
model_id = "pankaj9075rawat/chaiAI-Harthor" |
|
pipeline = pipeline( |
|
"text-generation", |
|
model=model_id, |
|
model_kwargs={"torch_dtype": torch.bfloat16}, |
|
|
|
device_map="auto", |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
terminators = [ |
|
pipeline.tokenizer.eos_token_id, |
|
pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") |
|
] |
|
|
|
|
|
def get_init_AI_response( |
|
message_history=[], max_tokens=128, temperature=1.1, top_p=0.9 |
|
): |
|
system_prompt = message_history |
|
prompt = pipeline.tokenizer.apply_chat_template( |
|
system_prompt, tokenize=False, add_generation_prompt=True |
|
) |
|
|
|
|
|
outputs = pipeline( |
|
prompt, |
|
max_new_tokens=max_tokens, |
|
eos_token_id=terminators, |
|
do_sample=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
) |
|
response = outputs[0]["generated_text"][len(prompt):] |
|
return response, system_prompt + [{"role": "assistant", "content": response}] |
|
|
|
|
|
def get_response( |
|
query, message_history=[], max_tokens=128, temperature=1.1, top_p=0.9 |
|
): |
|
user_prompt = message_history + [{"role": "user", "content": query}] |
|
prompt = pipeline.tokenizer.apply_chat_template( |
|
user_prompt, tokenize=False, add_generation_prompt=True |
|
) |
|
|
|
|
|
outputs = pipeline( |
|
prompt, |
|
max_new_tokens=max_tokens, |
|
eos_token_id=terminators, |
|
do_sample=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
) |
|
response = outputs[0]["generated_text"][len(prompt):] |
|
return response, user_prompt + [{"role": "assistant", "content": response}] |
|
|