Spaces:
Paused
Paused
File size: 5,751 Bytes
a1bd8b6 1874bf4 927b5de 1874bf4 927b5de 1874bf4 1baccf7 5cd12bf 927b5de 1aa5b50 927b5de 1aa5b50 ea7c9d2 1aa5b50 7c96374 927b5de 1874bf4 e1b8424 1874bf4 f71aa87 db61106 1874bf4 db61106 1aa5b50 db61106 1874bf4 3ea1758 7c96374 1874bf4 5ff99f2 1874bf4 7c96374 1874bf4 5ff99f2 ea7c9d2 1874bf4 7c96374 8eb345d 5ff99f2 7c96374 1874bf4 1aa5b50 88d1760 927b5de 1aa5b50 927b5de 1874bf4 968fe0b 1874bf4 1aa5b50 1874bf4 edc6972 927b5de 1874bf4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import transformers
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
import torch
import gradio as gr
import json
import os
import shutil
import requests
# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"
#Define variables
temperature=0.4
max_new_tokens=240
top_p=0.92
repetition_penalty=1.7
max_length=2048
model_name = "OpenLLM-France/Claire-7B-0.1"
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
device_map="auto",
torch_dtype=torch.bfloat16
load_in_4bit=True # For efficient inference, if supported by the GPU card
)
model = model.to_bettertransformer()
# Class to encapsulate the Falcon chatbot
class FalconChatBot:
def __init__(self, system_prompt="Le dialogue suivant est une conversation"):
self.system_prompt = system_prompt
def process_history(self, history):
if history is None:
return []
# Ensure that history is a list of dictionaries
if not isinstance(history, list):
return []
# Filter out special commands from the history
filtered_history = []
for message in history:
if isinstance(message, dict):
user_message = message.get("user", "")
assistant_message = message.get("assistant", "")
# Check if the user_message is not a special command
if not user_message.startswith("Protagoniste:"):
filtered_history.append({"user": user_message, "assistant": assistant_message})
return filtered_history
def predict(self, user_message, assistant_message, history, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
input_ids = input_ids.to(device)
# Process the history to remove special commands
processed_history = self.process_history(history)
# Combine the user and assistant messages into a conversation
conversation = f"{self.system_prompt}\n {assistant_message if assistant_message else ''}\n {user_message}\n "
# Encode the conversation using the tokenizer
input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=False)
input_ids = input_ids.to(device)
# Generate a response using the Falcon model
response = model.generate(
input_ids=input_ids,
max_length=max_length,
use_cache=False,
early_stopping=False,
bos_token_id=model.config.bos_token_id,
eos_token_id=model.config.eos_token_id,
pad_token_id=model.config.eos_token_id,
temperature=temperature,
do_sample=True,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty
) # Decode the generated response to text
# Decode the generated response to text
response_text = tokenizer.decode(response[0], skip_special_tokens=True)
# Update and return the history with the new conversation
updated_history = processed_history + [{"user": user_message, "assistant": response_text}]
return response_text, updated_history
# Create the Falcon chatbot instance
falcon_bot = FalconChatBot()
# Define the Gradio interface
title = "👋🏻Bienvenue à Tonic's 🌜🌚Claire Chat !"
description = "Vous pouvez utiliser [🌜🌚ClaireGPT](https://huggingface.co/OpenLLM-France/Claire-7B-0.1) Ou dupliquer pour l'uiliser localement ou sur huggingface! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
history = [
{"user": "Le dialogue suivant est une conversation entre Emmanuel Macron et Elon Musk:", "assistant": "Emmanuel Macron: Bonjour Monsieur Musk. Je vous remercie de me recevoir aujourd'hui."},]
examples = [
[
{
"user_message": "[Elon Musk:] - Bonjour Emmanuel. Enchanté de vous revoir.",
"assistant_message": "[Emmanuel Macron:] - Je vois que vous avez effectué un voyage dans la région de la Gascogne.",
"history": [],
"temperature": 0.4,
"max_new_tokens": 700,
"top_p": 0.90,
"repetition_penalty": 1.9,
}
]
]
additional_inputs=[
gr.Textbox("", label="Introduisez Un Autre Personnage Ici ou Mettez En Scene"),
gr.Slider(
label="Temperature",
value=0.9,
minimum=0.0,
maximum=1.0,
step=0.05,
interactive=True,
info="Higher values produce more diverse outputs",
),
gr.Slider(
label="Max new tokens",
value=256,
minimum=0,
maximum=3000,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
gr.Slider(
label="Top-p (nucleus sampling)",
value=0.90,
minimum=0.01,
maximum=0.99,
step=0.05,
interactive=True,
info="Higher values sample more low-probability tokens",
),
gr.Slider(
label="Repetition penalty",
value=1.2,
minimum=1.0,
maximum=2.0,
step=0.05,
interactive=True,
info="Penalize repeated tokens",
)
]
iface = gr.Interface(
fn=falcon_bot.predict,
title=title,
description=description,
examples=examples,
inputs=[
gr.inputs.Textbox(label="Utilisez se format pour initier une conversation [Personage:]", type="text", lines=5),
] + additional_inputs,
outputs="text",
theme="ParityError/Anime"
)
# Launch the Gradio interface for the Falcon model
iface.launch() |