Spaces:
Running
Running
File size: 6,044 Bytes
b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 76c8687 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 4b2da06 b508e95 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# import gradio as gr
# import torch
# import transformers
# from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
# from peft import PeftConfig, PeftModel
# import warnings
# from threading import Thread
# warnings.filterwarnings("ignore")
# PEFT_MODEL = "givyboy/phi-2-finetuned-mental-health-conversational"
# SYSTEM_PROMPT = """Answer the following question truthfully.
# If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
# If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'."""
# USER_PROMPT = lambda x: f"""<HUMAN>: {x}\n<ASSISTANT>: """
# ADD_RESPONSE = lambda x, y: f"""<HUMAN>: {x}\n<ASSISTANT>: {y}"""
# DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# bnb_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_use_double_quant=True,
# bnb_4bit_compute_dtype=torch.float16,
# )
# config = PeftConfig.from_pretrained(PEFT_MODEL)
# peft_base_model = AutoModelForCausalLM.from_pretrained(
# config.base_model_name_or_path,
# return_dict=True,
# # quantization_config=bnb_config,
# device_map="auto",
# trust_remote_code=True,
# offload_folder="offload/",
# offload_state_dict=True,
# )
# peft_model = PeftModel.from_pretrained(
# peft_base_model,
# PEFT_MODEL,
# offload_folder="offload/",
# offload_state_dict=True,
# )
# peft_model = peft_model.to(DEVICE)
# peft_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
# peft_tokenizer.pad_token = peft_tokenizer.eos_token
# pipeline = transformers.pipeline(
# "text-generation",
# model=peft_model,
# tokenizer=peft_tokenizer,
# torch_dtype=torch.bfloat16,
# trust_remote_code=True,
# device_map="auto",
# )
# # def format_message(message: str, history: list[str], memory_limit: int = 3) -> str:
# # if len(history) > memory_limit:
# # history = history[-memory_limit:]
# # if len(history) == 0:
# # return f"{SYSTEM_PROMPT}\n{USER_PROMPT(message)}"
# # formatted_message = f"{SYSTEM_PROMPT}\n{ADD_RESPONSE(history[0][0], history[0][1])}"
# # for msg, ans in history[1:]:
# # formatted_message += f"\n{ADD_RESPONSE(msg, ans)}"
# # formatted_message += f"\n{USER_PROMPT(message)}"
# # return formatted_message
# # def get_model_response(message: str, history: list[str]) -> str:
# # formatted_message = format_message(message, history)
# # sequences = pipeline(
# # formatted_message,
# # do_sample=True,
# # top_k=10,
# # num_return_sequences=1,
# # eos_token_id=peft_tokenizer.eos_token_id,
# # max_length=600,
# # )[0]
# # print(sequences["generated_text"])
# # output = sequences["generated_text"].split("<ASSISTANT>:")[-1].strip()
# # # print(f"Response: {output}")
# # return output
# start_message = ""
# def user(message, history):
# # Append the user's message to the conversation history
# return "", history + [[message, ""]]
# def chat(message, history):
# chat_history = []
# for item in history:
# chat_history.append({"role": "user", "content": item[0]})
# if item[1] is not None:
# chat_history.append({"role": "assistant", "content": item[1]})
# message = f"{SYSTEM_PROMPT}\n{USER_PROMPT(message)}"
# chat_history.append({"role": "user", "content": message})
# messages = peft_tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
# # Tokenize the messages string
# model_inputs = peft_tokenizer([messages], return_tensors="pt").to(DEVICE)
# streamer = transformers.TextIteratorStreamer(
# peft_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
# )
# generate_kwargs = dict(
# model_inputs,
# streamer=streamer,
# max_new_tokens=1024,
# do_sample=True,
# top_p=0.95,
# top_k=1000,
# temperature=0.75,
# num_beams=1,
# )
# t = Thread(target=peft_model.generate, kwargs=generate_kwargs)
# t.start()
# # Initialize an empty string to store the generated text
# partial_text = ""
# for new_text in streamer:
# # print(new_text)
# partial_text += new_text
# # Yield an empty string to cleanup the message textbox and the updated conversation history
# yield partial_text
# chat = gr.ChatInterface(fn=chat, title="Mental Health Chatbot - by Jayda Hunte")
# chat.launch(share=True)
import os
from openai import OpenAI
from dotenv import load_dotenv
import gradio as gr
load_dotenv()
API_KEY = os.getenv("OPENAI_API_KEY")
openai = OpenAI(api_key=API_KEY)
create_msg = lambda x, y: {"role": x, "content": y}
SYSTEM_PROMPT = create_msg(
"system",
"""You are a helpful mental health chatbot, please answer with care. If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'. If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'.""".strip(),
)
def predict(message, history):
history_openai_format = []
history_openai_format.append(SYSTEM_PROMPT)
for human, assistant in history:
history_openai_format.append({"role": "user", "content": human})
history_openai_format.append({"role": "assistant", "content": assistant})
history_openai_format.append({"role": "user", "content": message})
response = openai.chat.completions.create(
model="ft:gpt-3.5-turbo-0613:personal::8kBTG8eh", messages=history_openai_format, temperature=1.0, stream=True
)
partial_message = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
partial_message = partial_message + chunk.choices[0].delta.content
yield partial_message
gr.ChatInterface(fn=predict, title="Mental Health Chatbot - by Jayda Hunte").launch(share=True)
|