|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import transformers |
|
import torch |
|
import gradio as gr |
|
|
|
desired_dtype = torch.bfloat16 |
|
torch.set_default_dtype(torch.bfloat16) |
|
|
|
|
|
checkpoint = "tiiuae/falcon-7b-instruct" |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
|
checkpoint, device_map="auto", offload_folder="offload", trust_remote_code=True) |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True) |
|
|
|
pipeline = transformers.pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
torch_dtype=torch.bfloat16, |
|
trust_remote_code=True, |
|
device_map="auto", |
|
) |
|
|
|
def format_chat_prompt(message, chat_history, instruction): |
|
prompt = f"System:{instruction}" |
|
for turn in chat_history: |
|
user_message, bot_message = turn |
|
prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}" |
|
prompt = f"{prompt}\nUser: {message}\nAssistant:" |
|
return prompt |
|
|
|
def generate_seqs(prompt, max_new_tokens=None, stop_sequence=None, temperature=None): |
|
output = pipeline(prompt, |
|
max_length=200, |
|
truncation=True, |
|
max_new_tokens = max_new_tokens, |
|
stop_sequence = stop_sequence, |
|
temperature=temperature, |
|
do_sample=True, |
|
top_k=10, |
|
num_return_sequences=1, |
|
eos_token_id=tokenizer.eos_token_id) |
|
return output[0]['generated_text'] |
|
|
|
def respond(message, chat_history, instruction, temperature=0.7): |
|
prompt = format_chat_prompt(message, chat_history, instruction) |
|
chat_history = chat_history + [[message, ""]] |
|
stream = generate_seqs(prompt = prompt, |
|
max_new_tokens=8192, |
|
stop_sequence=["\nUser:", "<|endoftext|>"], |
|
temperature=temperature).split('Assistant: ')[-1] |
|
|
|
acc_text = "" |
|
|
|
for idx, response in enumerate(stream): |
|
|
|
text_token = response |
|
|
|
|
|
|
|
|
|
if idx == 0 and text_token.startswith(" "): |
|
text_token = text_token[1:] |
|
|
|
acc_text += text_token |
|
last_turn = list(chat_history.pop(-1)) |
|
last_turn[-1] += acc_text |
|
chat_history = chat_history + [last_turn] |
|
yield "", chat_history |
|
acc_text = "" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown( |
|
""" |
|
# General purpose chatbot - test & demo app by Srinivas.V.. |
|
## As this is a free hosted platform (Computing and Memory limitations), you will find it slow and the app may not provide appropriate answers after a few dialogues. Type in your prompt, click/ submit and wait for the resonse before typing in your next prompt. |
|
""") |
|
|
|
chatbot = gr.Chatbot(height=1000) |
|
msg = gr.Textbox(label="Prompt") |
|
with gr.Accordion(label="Advanced options",open=False): |
|
system = gr.Textbox(label="System message", lines=2, value="A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.") |
|
temperature = gr.Slider(label="temperature", minimum=0.1, maximum=1, value=0.7, step=0.1) |
|
btn = gr.Button("Submit") |
|
clear = gr.ClearButton(components=[msg, chatbot, system, temperature], value="Clear console") |
|
|
|
btn.click(respond, inputs=[msg, chatbot, system, temperature], outputs=[msg, chatbot]) |
|
msg.submit(respond, inputs=[msg, chatbot, system, temperature], outputs=[msg, chatbot]) |
|
gr.close_all() |
|
demo.queue().launch() |