Xgen

Paused

App Files Files Community

Xgen / app.py

Tonic

Update app.py

3792262 over 1 year ago

raw

history blame

5.72 kB

	import transformers
	from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
	import torch
	import gradio as gr
	import json
	import os
	import shutil
	import requests

	# Define the device
	device = "cuda" if torch.cuda.is_available() else "cpu"
	#Define variables
	temperature=0.4
	max_new_tokens=240
	top_p=0.92
	repetition_penalty=1.7
	#max_length=2048

	model_name = "OpenLLM-France/Claire-7B-0.1"

	tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
	model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	load_in_4bit=True # For efficient inference, if supported by the GPU card
	)
	model = model.to_bettertransformer()

	# Class to encapsulate the Falcon chatbot
	class FalconChatBot:
	def __init__(self, system_prompt="Le dialogue suivant est une conversation"):
	self.system_prompt = system_prompt

	def process_history(self, history):
	if history is None:
	return []

	# Ensure that history is a list of dictionaries
	if not isinstance(history, list):
	return []

	# Filter out special commands from the history
	filtered_history = []
	for message in history:
	if isinstance(message, dict):
	user_message = message.get("user", "")
	assistant_message = message.get("assistant", "")
	# Check if the user_message is not a special command
	if not user_message.startswith("Protagoniste:"):
	filtered_history.append({"user": user_message, "assistant": assistant_message})
	return filtered_history

	def predict(self, user_message, assistant_message, history, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
	# Process the history to remove special commands
	processed_history = self.process_history(history)
	# Combine the user and assistant messages into a conversation
	conversation = f"{self.system_prompt}\n {assistant_message if assistant_message else ''}\n {user_message}\n "
	# Encode the conversation using the tokenizer
	input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=False)
	input_ids = input_ids.to(device)
	# Generate a response using the Falcon model
	response = model.generate(
	input_ids=input_ids,
	# max_length=max_length,
	use_cache=False,
	early_stopping=False,
	bos_token_id=model.config.bos_token_id,
	eos_token_id=model.config.eos_token_id,
	pad_token_id=model.config.eos_token_id,
	temperature=temperature,
	do_sample=True,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty
	) # Decode the generated response to text

	# Decode the generated response to text
	response_text = tokenizer.decode(response[0], skip_special_tokens=True)
	# Update and return the history with the new conversation
	updated_history = processed_history + [{"user": user_message, "assistant": response_text}]
	return response_text, updated_history


	# Create the Falcon chatbot instance
	falcon_bot = FalconChatBot()

	# Define the Gradio interface
	title = "👋🏻Bienvenue à Tonic's 🌜🌚Claire Chat !"
	description = "Vous pouvez utiliser [🌜🌚ClaireGPT](https://huggingface.co/OpenLLM-France/Claire-7B-0.1) Ou dupliquer pour l'uiliser localement ou sur huggingface! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
	history = [
	{
	"user": "Le dialogue suivant est une conversation entre Emmanuel Macron et Elon Musk:",
	"assistant": "Emmanuel Macron: Bonjour Monsieur Musk. Je vous remercie de me recevoir aujourd'hui."
	},
	]
	examples = [
	[
	"[Elon Musk:] - Bonjour Emmanuel. Enchanté de vous revoir.", # user_message
	"[Emmanuel Macron:] - Je vois que vous avez effectué un voyage dans la région de la Gascogne.", # assistant_message
	history, # history
	0.4, # temperature
	200, # max_new_tokens
	0.90, # top_p
	1.9, # repetition_penalty
	]
	]

	additional_inputs=[
	gr.Textbox("", label="Introduisez Un Autre Personnage Ici ou Mettez En Scene"),
	gr.Slider(
	label="Temperature",
	value=0.7, # Default value
	minimum=0.05,
	maximum=1.0,
	step=0.05,
	interactive=True,
	info="Higher values produce more diverse outputs",
	),
	gr.Slider(
	label="Max new tokens",
	value=100, # Default value
	minimum=25,
	maximum=256,
	step=1,
	interactive=True,
	info="The maximum numbers of new tokens",
	),
	gr.Slider(
	label="Top-p (nucleus sampling)",
	value=0.90,
	minimum=0.01,
	maximum=0.99,
	step=0.05,
	interactive=True,
	info="Higher values sample more low-probability tokens",
	),
	gr.Slider(
	label="Repetition penalty",
	value=1.9,
	minimum=1.0,
	maximum=2.0,
	step=0.05,
	interactive=True,
	info="Penalize repeated tokens",
	)
	]

	iface = gr.Interface(
	fn=falcon_bot.predict,
	title=title,
	description=description,
	examples=examples,
	inputs=[
	gr.inputs.Textbox(label="Utilisez se format pour initier une conversation [Personage:]", type="text", lines=5),
	] + additional_inputs,
	outputs="text",
	theme="ParityError/Anime"
	)

	# Launch the Gradio interface for the Falcon model
	iface.launch()