Spaces:

JerniganLab
/

qa-only

Sleeping

File size: 1,788 Bytes

2bc20d9
b0485d6
 
 
 
2bc20d9
b0485d6
2bc20d9
b0485d6
 
2bc20d9
b0485d6
2bc20d9
 
b0485d6
 
 
 
 
 
 
2bc20d9
b0485d6
2bc20d9
b0485d6
 
 
 
2bc20d9
b0485d6
 
 
 
 
 
 
 
 
 
 
 
 
2bc20d9
 
 
 
 
b0485d6
 
 
2bc20d9
b0485d6
 
 
 
 
2bc20d9

import gradio as gr
import transformers
import torch
from peft import PeftModel
import os

HF_TOKEN = os.environ.get("HF_TOKEN")

model_id = "JerniganLab/qa-only"
base_model = "meta-llama/Meta-Llama-3-8B-Instruct"

llama_model = transformers.AutoModelForCausalLM.from_pretrained(base_model)


pipeline = transformers.pipeline(
    "text-generation",
    model=llama_model,
    tokenizer=base_model,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device="cuda",
)

pipeline.model = PeftModel.from_pretrained(llama_model, model_id)

def chat_function(message, history, system_prompt, max_new_tokens, temperature):
    messages = [{"role":"system","content":system_prompt},
                {"role":"user", "content":message}]
    prompt = pipeline.tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,)
    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
    outputs = pipeline(
        prompt,
        max_new_tokens = max_new_tokens,
        eos_token_id = terminators,
        do_sample = True,
        temperature = temperature + 0.1,
        top_p = 0.9,)
    return outputs[0]["generated_text"][len(prompt):]

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    chat_function,
    textbox=gr.Textbox(placeholder="Enter message here", container=False, scale = 7),
    chatbot=gr.Chatbot(height=400),
    additional_inputs=[
        gr.Textbox("You are helpful AI", label="System Prompt"),
        gr.Slider(500,4000, label="Max New Tokens"),
        gr.Slider(0,1, label="Temperature")
    ]
    )


if __name__ == "__main__":
    demo.launch()