import torch from transformers import pipeline # print("entered llama.py") model_id = "pankaj9075rawat/chaiAI-Harthor" pipeline = pipeline( "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, # device="cuda", device_map="auto", # token=access_token, ) # load_directory = os.path.join(os.path.dirname(__file__), "local_model_directory") # pipeline = pipeline( # "text-generation", # model=load_directory, # model_kwargs={"torch_dtype": torch.bfloat16}, # # device="cuda", # device_map="auto", # # token=access_token # ) terminators = [ pipeline.tokenizer.eos_token_id, pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") ] def get_init_AI_response( message_history=[], max_tokens=128, temperature=1.1, top_p=0.9 ): system_prompt = message_history prompt = pipeline.tokenizer.apply_chat_template( system_prompt, tokenize=False, add_generation_prompt=True ) # print("prompt before coversion: ", user_prompt) # print("prompt after conversion: ", prompt) outputs = pipeline( prompt, max_new_tokens=max_tokens, eos_token_id=terminators, do_sample=True, temperature=temperature, top_p=top_p, ) response = outputs[0]["generated_text"][len(prompt):] return response, system_prompt + [{"role": "assistant", "content": response}] def get_response( query, message_history=[], max_tokens=128, temperature=1.1, top_p=0.9 ): user_prompt = message_history + [{"role": "user", "content": query}] prompt = pipeline.tokenizer.apply_chat_template( user_prompt, tokenize=False, add_generation_prompt=True ) # print("prompt before coversion: ", user_prompt) # print("prompt after conversion: ", prompt) outputs = pipeline( prompt, max_new_tokens=max_tokens, eos_token_id=terminators, do_sample=True, temperature=temperature, top_p=top_p, ) response = outputs[0]["generated_text"][len(prompt):] return response, user_prompt + [{"role": "assistant", "content": response}]