|
import gradio as gr |
|
import torch |
|
import random |
|
import time |
|
from transformers import pipeline |
|
|
|
|
|
generator = pipeline( |
|
'text-generation', |
|
model="heegyu/gorani-v0", |
|
device="cuda:0" if torch.cuda.is_available() else 'cpu' |
|
) |
|
|
|
def query(message, chat_history, max_turn=2): |
|
|
|
|
|
|
|
|
|
prompt = [] |
|
if len(chat_history) > max_turn: |
|
chat_history = chat_history[-max_turn:] |
|
for i, (user, bot) in enumerate(chat_history): |
|
|
|
|
|
|
|
prompt.append(f"<usr> {user}") |
|
prompt.append(f"<bot> {bot}") |
|
prompt.append(f"<usr> {message}") |
|
prompt = "\n".join(prompt) + "\n<bot>" |
|
|
|
output = generator( |
|
prompt, |
|
do_sample=True, |
|
top_p=0.9, |
|
early_stopping=True, |
|
max_new_tokens=256, |
|
)[0]['generated_text'] |
|
|
|
print(output) |
|
|
|
response = output[len(prompt):] |
|
return response.strip() |
|
|
|
with gr.Blocks() as demo: |
|
chatbot = gr.Chatbot().style(height=700) |
|
msg = gr.Textbox() |
|
clear = gr.Button("Clear") |
|
|
|
def respond(message, chat_history): |
|
bot_message = query(message, chat_history) |
|
chat_history.append((message, bot_message)) |
|
|
|
return "", chat_history |
|
|
|
msg.submit(respond, [msg, chatbot], [msg, chatbot]) |
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
demo.launch() |