File size: 5,971 Bytes
b4b53db
c2cedec
b4b53db
 
c2cedec
b4b53db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2cedec
f525a35
 
 
 
 
 
 
 
 
 
fee5192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3662f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fee5192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3662f
 
 
 
 
 
 
 
 
fee5192
aa3662f
 
 
 
 
 
f525a35
 
c2cedec
eb80ed2
 
 
53996ae
 
 
 
 
 
 
 
 
 
 
 
 
 
c2cedec
53996ae
 
 
 
 
 
 
c2cedec
53996ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2cedec
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr
import torch
from peft import PeftConfig, PeftModel

# Loading PEFT model
PEFT_MODEL = "TurtleLiu/mistral7b_psychology_bot"

config = PeftConfig.from_pretrained(PEFT_MODEL)

bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

peft_base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

model = PeftModel.from_pretrained(peft_base_model, PEFT_MODEL)
model = model.merge_and_unload()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Generate response

def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200, do_sample=True,
    max_new_tokens=1024, 
    temperature=0.9, 
    top_k=50, 
    top_p=0.95,
    num_return_sequences=1)

def generate_response(message, history):
    prompt = "<s>"
    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    result = pipe(f"{prompt}")[0]['generated_text']
    return result

'''
def generate_response(prompt, history, temperature=0.9, max_new_tokens=1024, top_p=0.95, repetition_penalty=1.0, **kwargs,):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    runtimeFlag = "cuda:0"
    formatted_prompt = format_prompt(f"{prompt}", history)
    inputs = tokenizer([formatted_prompt], return_tensors="pt").to(runtimeFlag)
    generation_config = GenerationConfig(
        temperature=temperature,
        top_p=top_p,
        max_new_tokens=max_new_tokens,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        **kwargs,
    )
    generation_output = model.generate(
            **inputs,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=max_new_tokens,
        )
'''

# UI design
examples=[
    ["Patient is feeling stressed due to work and has trouble sleeping.", None, None, None, None, None],
    ["Client is dealing with relationship issues and is seeking advice on communication strategies.", None, None, None, None, None],
    ["Individual has recently experienced a loss and is having difficulty coping with grief.", None, None, None, None, None],
]

gr.ChatInterface(
    fn=generate_response,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    title="Psychological Assistant: Expert in Assessment and Strategic Planning",
    description="Enter counseling notes to generate an assessment and plan.",
    examples=examples,
    concurrency_limit=20,
).launch(show_api=False, debug=True)
    

'''
from huggingface_hub import InferenceClient
import gradio as gr

client = InferenceClient(
    "TurtleLiu/mistral7b_psychology_bot"
)


def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt



def format_prompt(message, history):
    prompt = "<s>"
    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "
    prompt += f"[INST] As a psychology counselor assistant, provide an assessment and plan for the following counseling notes. Please present a summary, don't make it so long. Present in lines.: {message} [/INST]"
    return prompt

    
def generate(
    prompt, history, temperature=0.9, max_new_tokens=1024, top_p=0.95, repetition_penalty=1.0,
):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )

    formatted_prompt = format_prompt(f"{prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output


examples=[
    ["Patient is feeling stressed due to work and has trouble sleeping.", None, None, None, None, None],
    ["Client is dealing with relationship issues and is seeking advice on communication strategies.", None, None, None, None, None],
    ["Individual has recently experienced a loss and is having difficulty coping with grief.", None, None, None, None, None],
]

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    title="Psychological Assistant: Expert in Assessment and Strategic Planning",
    description="Enter counseling notes to generate an assessment and plan.",
    examples=examples,
    concurrency_limit=20,
).launch(show_api=False, debug=True)
'''