Spaces:
Sleeping
Sleeping
File size: 5,249 Bytes
90af0e7 834b899 8a84eb2 90af0e7 ba33077 90af0e7 c42ac02 ba33077 8a84eb2 90af0e7 c42ac02 90af0e7 c42ac02 90af0e7 c88df11 90af0e7 c88df11 90af0e7 b154f3d 90af0e7 ba33077 90af0e7 8a84eb2 90af0e7 b154f3d 90af0e7 ba33077 b154f3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import json
import os
import random
from threading import Thread
import gradio as gr
import spaces
import torch
from langchain.schema import AIMessage, HumanMessage
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, SecretStr
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
StoppingCriteria,
StoppingCriteriaList,
TextIteratorStreamer,
)
tokenizer = AutoTokenizer.from_pretrained("ContextualAI/archangel_sft-kto_llama13b")
model = AutoModelForCausalLM.from_pretrained(
"ContextualAI/archangel_sft-kto_llama13b", device_map="auto", load_in_4bit=True
)
class OAAPIKey(BaseModel):
openai_api_key: SecretStr
def set_openai_api_key(api_key: SecretStr):
os.environ["OPENAI_API_KEY"] = api_key.get_secret_value()
llm = ChatOpenAI(temperature=1.0, model="gpt-3.5-turbo-0125")
return llm
class StopOnSequence(StoppingCriteria):
def __init__(self, sequence, tokenizer):
self.sequence_ids = tokenizer.encode(sequence, add_special_tokens=False)
self.sequence_len = len(self.sequence_ids)
def __call__(
self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
) -> bool:
for i in range(input_ids.shape[0]):
if input_ids[i, -self.sequence_len:].tolist() == self.sequence_ids:
return True
return False
@spaces.GPU(duration=54)
def spaces_model_predict(message: str, history: list[tuple[str, str]]):
history_transformer_format = history + [[message, ""]]
stop = StopOnSequence("<|user|>", tokenizer)
messages = "".join(
[
f"<|user|>\n{item[0]}\n<|assistant|>\n{item[1]}"
for item in history_transformer_format
]
)
model_inputs = tokenizer([messages], return_tensors="pt").to("cuda")
streamer = TextIteratorStreamer(
tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
)
generate_kwargs = dict(
model_inputs,
streamer=streamer,
max_new_tokens=512,
do_sample=True,
top_p=0.95,
top_k=1000,
temperature=1.0,
num_beams=1,
stopping_criteria=StoppingCriteriaList([stop]),
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
generated_text = ""
for new_token in streamer:
generated_text += new_token
if "<|user|>" in generated_text:
generated_text = generated_text.split("<|user|>")[0].strip()
break
return generated_text
def predict(
message: str,
chat_history_openai: list[tuple[str, str]],
chat_history_spaces: list[tuple[str, str]],
openai_api_key: SecretStr,
):
openai_key_model = OAAPIKey(openai_api_key=openai_api_key)
openai_llm = set_openai_api_key(api_key=openai_key_model.openai_api_key)
# OpenAI
history_langchain_format_openai = []
for human, ai in chat_history_openai:
history_langchain_format_openai.append(HumanMessage(content=human))
history_langchain_format_openai.append(AIMessage(content=ai))
history_langchain_format_openai.append(HumanMessage(content=message))
openai_response = openai_llm.invoke(input=history_langchain_format_openai)
# Spaces Model
spaces_model_response = spaces_model_predict(message, chat_history_spaces)
chat_history_openai.append((message, openai_response.content))
chat_history_spaces.append((message, spaces_model_response))
return "", chat_history_openai, chat_history_spaces
with open("askbakingtop.json", "r") as file:
ask_baking_msgs = json.load(file)
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=1):
openai_api_key = gr.Textbox(
label="Please enter your OpenAI API key",
type="password",
elem_id="lets-chat-openai-api-key",
)
with gr.Row():
options = [ask["history"] for ask in random.sample(ask_baking_msgs, k=3)]
msg = gr.Dropdown(
options,
label="Please enter your message",
interactive=True,
multiselect=False,
allow_custom_value=True,
)
with gr.Row():
with gr.Column(scale=1):
chatbot_openai = gr.Chatbot(label="OpenAI Chatbot 🏢")
with gr.Column(scale=1):
chatbot_spaces = gr.Chatbot(
label="Your own fine-tuned preference optimized Chatbot 💪"
)
with gr.Row():
submit_button = gr.Button("Submit")
with gr.Row():
clear = gr.ClearButton([msg])
def respond(
message: str,
chat_history_openai: list[tuple[str, str]],
chat_history_spaces: list[tuple[str, str]],
openai_api_key: SecretStr,
):
return predict(
message=message,
chat_history_openai=chat_history_openai,
chat_history_spaces=chat_history_spaces,
openai_api_key=openai_api_key,
)
submit_button.click(
fn=respond,
inputs=[
msg,
chatbot_openai,
chatbot_spaces,
openai_api_key,
],
outputs=[msg, chatbot_openai, chatbot_spaces],
)
demo.launch()
|