Milo-LLM-Test

Runtime error

App Files Files Community

Milo-LLM-Test / app.py

vericudebuget

Update app.py

4fc89d8 verified about 1 year ago

raw

history blame

2.95 kB

	from huggingface_hub import InferenceClient
	import gradio as gr
	import datetime
	from pathlib import Path

	# Initialize the InferenceClient
	client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

	def format_prompt(message, history):
	prompt = "<s>"
	for user_prompt, bot_response in history:
	prompt += f"[INST] {user_prompt} [/INST]"
	prompt += f" {bot_response}</s> "
	prompt += f"[INST] {message} [/INST]"
	return prompt

	def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0):
	temperature = max(float(temperature), 1e-2)
	top_p = float(top_p)
	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	seed=42,
	)
	now = datetime.datetime.now()
	formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
	system_prompt = f"System time: {formatted_time}. Instructions: You are an AI named Milo made by ConvoLite. You use the he/him pronouns(boy) You we`re made in 2024. You act very friendly, responding to the user with the same vibe you get taked to. You never start the message with ”as an AI assistant” or ”as an AI made by ConvoLite” or something like that. You don`t talk a lot. You only respond to what the user asks."
	formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
	stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
	output = ""
	for response in stream:
	output += response.token.text
	yield output

	additional_inputs = [
	gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
	gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
	gr.Slider(label="Max new tokens", value=9048, minimum=256, maximum=9048, step=64, interactive=True, info="The maximum numbers of new tokens"),
	gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
	gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens")
	]

	avatar_images = ("https://i.postimg.cc/pXjKKVXG/user-circle.png", "https://i.postimg.cc/qq04Yz93/CL3.png")

	gr.ChatInterface(
	fn=generate,
	chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel", height="auto", avatar_images=avatar_images),
	additional_inputs=additional_inputs,
	title="ConvoLite",
	submit_btn="➢",
	retry_btn="Retry",
	undo_btn="↩ Undo",
	clear_btn="Clear (New chat)",
	stop_btn="Stop ▢",
	concurrency_limit=20,
	).launch(show_api=False)