Spaces:
Runtime error
Runtime error
# load model once | |
import torch | |
from peft import AutoPeftModelForCausalLM | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import gradio as gr | |
import random | |
import time | |
model_id = "hikinegi/Llama-JAVA_tuned" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoPeftModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16) | |
# Set the model to evaluation mode | |
#model.eval() | |
def generate_pred(text): | |
# Disable gradient calculation | |
with torch.no_grad(): | |
# generate | |
text=f"<s>[INST]<<SYS>>\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n<</SYS>>\n{text}[/INST]" | |
inputs = tokenizer(text, return_tensors="pt") #.to("cuda") | |
outputs = model.generate(input_ids=inputs["input_ids"], | |
attention_mask=inputs["attention_mask"], | |
max_new_tokens=1024, | |
pad_token_id=tokenizer.eos_token_id) | |
return (tokenizer.decode(outputs[0], skip_special_tokens=False)) | |
with gr.Blocks(theme=gr.themes.Monochrome()) as demo: | |
gr.Markdown("""<h1><center>CodeGuru will answer all of your'e JAVA coding Question</center></h1> """) | |
chatbot = gr.Chatbot(label="CodeGuru") | |
msg = gr.Textbox(label = "Question") | |
clear = gr.ClearButton([msg, chatbot]) | |
def user(user_message, history): | |
return "", history + [[user_message, None]] | |
def bot(history): | |
bot_message = generate_pred(history[-1][0]) | |
history[-1][1] = "" | |
for character in bot_message: | |
history[-1][1] += character | |
time.sleep(0.05) | |
yield history | |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( | |
bot, chatbot, chatbot | |
) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
with gr.Row(visible=True) as button_row: | |
upvote_btn = gr.Button(value="π Upvote", interactive=True) | |
downvote_btn = gr.Button(value="π Downvote", interactive=True) | |
demo.queue() | |
demo.launch(debug=True) |