File size: 2,161 Bytes
6d64b51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import torch
from transformers import pipeline
# print("entered llama.py")
model_id = "pankaj9075rawat/chaiAI-Harthor"
pipeline = pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
# device="cuda",
device_map="auto",
# token=access_token,
)
# load_directory = os.path.join(os.path.dirname(__file__), "local_model_directory")
# pipeline = pipeline(
# "text-generation",
# model=load_directory,
# model_kwargs={"torch_dtype": torch.bfloat16},
# # device="cuda",
# device_map="auto",
# # token=access_token
# )
terminators = [
pipeline.tokenizer.eos_token_id,
pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
def get_init_AI_response(
message_history=[], max_tokens=128, temperature=1.1, top_p=0.9
):
system_prompt = message_history
prompt = pipeline.tokenizer.apply_chat_template(
system_prompt, tokenize=False, add_generation_prompt=True
)
# print("prompt before coversion: ", user_prompt)
# print("prompt after conversion: ", prompt)
outputs = pipeline(
prompt,
max_new_tokens=max_tokens,
eos_token_id=terminators,
do_sample=True,
temperature=temperature,
top_p=top_p,
)
response = outputs[0]["generated_text"][len(prompt):]
return response, system_prompt + [{"role": "assistant", "content": response}]
def get_response(
query, message_history=[], max_tokens=128, temperature=1.1, top_p=0.9
):
user_prompt = message_history + [{"role": "user", "content": query}]
prompt = pipeline.tokenizer.apply_chat_template(
user_prompt, tokenize=False, add_generation_prompt=True
)
# print("prompt before coversion: ", user_prompt)
# print("prompt after conversion: ", prompt)
outputs = pipeline(
prompt,
max_new_tokens=max_tokens,
eos_token_id=terminators,
do_sample=True,
temperature=temperature,
top_p=top_p,
)
response = outputs[0]["generated_text"][len(prompt):]
return response, user_prompt + [{"role": "assistant", "content": response}]
|