File size: 2,161 Bytes
6d64b51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import torch
from transformers import pipeline


# print("entered llama.py")
model_id = "pankaj9075rawat/chaiAI-Harthor"
pipeline = pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    # device="cuda",
    device_map="auto",
    # token=access_token,
)

# load_directory = os.path.join(os.path.dirname(__file__), "local_model_directory")

# pipeline = pipeline(
#     "text-generation",
#     model=load_directory,
#     model_kwargs={"torch_dtype": torch.bfloat16},
#     # device="cuda",
#     device_map="auto",
#     # token=access_token
# )

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]


def get_init_AI_response(
          message_history=[], max_tokens=128, temperature=1.1, top_p=0.9
      ):
    system_prompt = message_history
    prompt = pipeline.tokenizer.apply_chat_template(
        system_prompt, tokenize=False, add_generation_prompt=True
    )
    # print("prompt before coversion: ", user_prompt)
    # print("prompt after conversion: ", prompt)
    outputs = pipeline(
        prompt,
        max_new_tokens=max_tokens,
        eos_token_id=terminators,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
    )
    response = outputs[0]["generated_text"][len(prompt):]
    return response, system_prompt + [{"role": "assistant", "content": response}]


def get_response(
          query, message_history=[], max_tokens=128, temperature=1.1, top_p=0.9
      ):
    user_prompt = message_history + [{"role": "user", "content": query}]
    prompt = pipeline.tokenizer.apply_chat_template(
        user_prompt, tokenize=False, add_generation_prompt=True
    )
    # print("prompt before coversion: ", user_prompt)
    # print("prompt after conversion: ", prompt)
    outputs = pipeline(
        prompt,
        max_new_tokens=max_tokens,
        eos_token_id=terminators,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
    )
    response = outputs[0]["generated_text"][len(prompt):]
    return response, user_prompt + [{"role": "assistant", "content": response}]