neo_doublequant / app.py
Tonic's picture
Update app.py
1fba1cf verified
raw
history blame
4.51 kB
import gradio as gr
import torch
import transformers
import bitsandbytes
import accelerate
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import os
title = """# Welcome to 🌟Tonic's🐇🥷🏻Neo
WhiteRabbit🐇🥷🏻Neo is a model series that can be used for offensive and defensive cybersecurity. You can build with this endpoint using🐇🥷🏻Neo available here : [WhiteRabbitNeo/WhiteRabbitNeo-33B-v1.5](https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-33B-v1.5). You can also use 🐇🥷🏻Neo by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/neo?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) Math 🔍 [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [SciTonic](https://github.com/Tonic-AI/scitonic)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
"""
default_system_prompt = """SYSTEM: You are an AI that code. Answer with code."""
model_path = "WhiteRabbitNeo/WhiteRabbitNeo-33B-v1.5"
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
trust_remote_code=True,
quantization_config=quantization_config
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
def generate_text(custom_prompt, user_input, temperature, generate_len, top_p, top_k):
system_prompt = custom_prompt if custom_prompt else default_system_prompt
llm_prompt = f"{system_prompt} \nUSER: {user_input} \nASSISTANT: "
tokens = tokenizer.encode(llm_prompt, return_tensors="pt")
tokens = tokens.to("cuda")
length = tokens.shape[1]
with torch.no_grad():
output = model.generate(
input_ids=tokens,
max_length=length + generate_len,
temperature=temperature,
top_p=top_p,
top_k=top_k,
num_return_sequences=1,
)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
answer = generated_text[len(llm_prompt):].strip()
return answer
def gradio_app():
with gr.Blocks() as demo:
gr.Markdown(title)
with gr.Row():
custom_prompt = gr.Textbox(label="🐇🥷🏻NeoCustom System Prompt (optional)", placeholder="Leave blank to use the default prompt...")
instruction = gr.Textbox(label="Your Instruction", placeholder="Type your question here...")
with gr.Row():
temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature")
generate_len = gr.Slider(minimum=100, maximum=1024, step=10, value=100, label="Generate Length")
top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Top P")
top_k = gr.Slider(minimum=0, maximum=100, step=1, value=50, label="Top K")
with gr.Row():
generate_btn = gr.Button("Generate")
output = gr.Code(label="🐇🥷🏻Neo:", lines=10)
generate_btn.click(
fn=generate_text,
inputs=[custom_prompt, instruction, temperature, generate_len, top_p, top_k],
outputs=output
)
demo.launch()
if __name__ == "__main__":
gradio_app()