Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -6,17 +6,17 @@ import torch
|
|
6 |
import gradio as gr
|
7 |
import sentencepiece
|
8 |
|
9 |
-
title = "# ²Welcome to 🙋🏻♂️Tonic's
|
10 |
-
description = """[
|
11 |
|
12 |
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
|
13 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
14 |
-
model_name = "
|
15 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
16 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
|
17 |
|
18 |
-
class
|
19 |
-
def __init__(self, model, tokenizer, system_message="You are
|
20 |
self.model = model
|
21 |
self.tokenizer = tokenizer
|
22 |
self.system_message = system_message
|
@@ -25,7 +25,7 @@ class TuluChatBot:
|
|
25 |
self.system_message = new_system_message
|
26 |
|
27 |
def format_prompt(self, user_message):
|
28 |
-
prompt = f"
|
29 |
return prompt
|
30 |
|
31 |
def predict(self, user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
|
@@ -49,23 +49,23 @@ class TuluChatBot:
|
|
49 |
return response
|
50 |
|
51 |
def gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
|
52 |
-
|
53 |
if not do_sample:
|
54 |
max_length = 780
|
55 |
temperature = 1.2
|
56 |
top_p = 0.9
|
57 |
repetition_penalty = 0.9
|
58 |
-
response =
|
59 |
return response
|
60 |
|
61 |
-
|
62 |
|
63 |
|
64 |
with gr.Blocks(theme = "ParityError/Anime") as demo:
|
65 |
gr.Markdown(title)
|
66 |
gr.Markdown(description)
|
67 |
with gr.Row():
|
68 |
-
system_message = gr.Textbox(label="Optional
|
69 |
user_message = gr.Textbox(label="Your Message", lines=3)
|
70 |
with gr.Row():
|
71 |
do_sample = gr.Checkbox(label="Advanced", value=False)
|
@@ -78,7 +78,7 @@ with gr.Blocks(theme = "ParityError/Anime") as demo:
|
|
78 |
repetition_penalty = gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0, step=0.05)
|
79 |
|
80 |
submit_button = gr.Button("Submit")
|
81 |
-
output_text = gr.Textbox(label="
|
82 |
|
83 |
def process(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
|
84 |
return gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample)
|
|
|
6 |
import gradio as gr
|
7 |
import sentencepiece
|
8 |
|
9 |
+
title = "# ²Welcome to 🙋🏻♂️Tonic's🧠🤌🏻Neural Chat (From Intel)!"
|
10 |
+
description = """Try out [Intel/neural-chat-7b-v3-1](https://huggingface.co/Intel/neural-chat-7b-v3-1) the Instruct Llama Finetune using the [mistralai/Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) recipe. You can use [Intel/neural-chat-7b-v3-1](https://huggingface.co/Intel/neural-chat-7b-v3-1) here via API using Gradio by scrolling down and clicking Use 'Via API' or privately by [cloning this space on huggingface](https://huggingface.co/spaces/TeamTonic/NeuralChat?duplicate=true) . [Join my active builders' server on discord](https://discord.gg/VqTxc76K3u). Let's build together!. """
|
11 |
|
12 |
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
|
13 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
14 |
+
model_name = "Intel/neural-chat-7b-v3-1"
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained("Intel/neural-chat-7b-v3-1")
|
16 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
|
17 |
|
18 |
+
class IntelChatBot:
|
19 |
+
def __init__(self, model, tokenizer, system_message="You are 🧠🤌🏻Neuro, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."):
|
20 |
self.model = model
|
21 |
self.tokenizer = tokenizer
|
22 |
self.system_message = system_message
|
|
|
25 |
self.system_message = new_system_message
|
26 |
|
27 |
def format_prompt(self, user_message):
|
28 |
+
prompt = f"### System:\n {self.system_message}\n ### User:\n{user_message}\n### Assistant:\n"
|
29 |
return prompt
|
30 |
|
31 |
def predict(self, user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
|
|
|
49 |
return response
|
50 |
|
51 |
def gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
|
52 |
+
Intel_bot.set_system_message(system_message)
|
53 |
if not do_sample:
|
54 |
max_length = 780
|
55 |
temperature = 1.2
|
56 |
top_p = 0.9
|
57 |
repetition_penalty = 0.9
|
58 |
+
response = Intel_bot.predict(user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample)
|
59 |
return response
|
60 |
|
61 |
+
Intel_bot = IntelChatBot(model, tokenizer)
|
62 |
|
63 |
|
64 |
with gr.Blocks(theme = "ParityError/Anime") as demo:
|
65 |
gr.Markdown(title)
|
66 |
gr.Markdown(description)
|
67 |
with gr.Row():
|
68 |
+
system_message = gr.Textbox(label="Optional 🧠🤌🏻NeuralChat Assistant Message", lines=2)
|
69 |
user_message = gr.Textbox(label="Your Message", lines=3)
|
70 |
with gr.Row():
|
71 |
do_sample = gr.Checkbox(label="Advanced", value=False)
|
|
|
78 |
repetition_penalty = gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0, step=0.05)
|
79 |
|
80 |
submit_button = gr.Button("Submit")
|
81 |
+
output_text = gr.Textbox(label="🧠🤌🏻NeuralChat Response")
|
82 |
|
83 |
def process(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
|
84 |
return gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample)
|