chat_model_api_v1 / src /app /llamaLLM.py
pankaj9075rawat's picture
Upload folder using huggingface_hub
6d64b51 verified
import torch
from transformers import pipeline
# print("entered llama.py")
model_id = "pankaj9075rawat/chaiAI-Harthor"
pipeline = pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
# device="cuda",
device_map="auto",
# token=access_token,
)
# load_directory = os.path.join(os.path.dirname(__file__), "local_model_directory")
# pipeline = pipeline(
# "text-generation",
# model=load_directory,
# model_kwargs={"torch_dtype": torch.bfloat16},
# # device="cuda",
# device_map="auto",
# # token=access_token
# )
terminators = [
pipeline.tokenizer.eos_token_id,
pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
def get_init_AI_response(
message_history=[], max_tokens=128, temperature=1.1, top_p=0.9
):
system_prompt = message_history
prompt = pipeline.tokenizer.apply_chat_template(
system_prompt, tokenize=False, add_generation_prompt=True
)
# print("prompt before coversion: ", user_prompt)
# print("prompt after conversion: ", prompt)
outputs = pipeline(
prompt,
max_new_tokens=max_tokens,
eos_token_id=terminators,
do_sample=True,
temperature=temperature,
top_p=top_p,
)
response = outputs[0]["generated_text"][len(prompt):]
return response, system_prompt + [{"role": "assistant", "content": response}]
def get_response(
query, message_history=[], max_tokens=128, temperature=1.1, top_p=0.9
):
user_prompt = message_history + [{"role": "user", "content": query}]
prompt = pipeline.tokenizer.apply_chat_template(
user_prompt, tokenize=False, add_generation_prompt=True
)
# print("prompt before coversion: ", user_prompt)
# print("prompt after conversion: ", prompt)
outputs = pipeline(
prompt,
max_new_tokens=max_tokens,
eos_token_id=terminators,
do_sample=True,
temperature=temperature,
top_p=top_p,
)
response = outputs[0]["generated_text"][len(prompt):]
return response, user_prompt + [{"role": "assistant", "content": response}]