Spaces:

wop
/

Kosmox

Paused

App Files Files Community

Kosmox / app.py

wop

Update app.py

c18814e verified about 1 year ago

raw

history blame

2.33 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM
	import torch

	# Load the model
	model_name = "wop/kosmox-gguf"
	model = AutoModelForCausalLM.from_pretrained(model_name)

	# Define the chat template function
	def format_chat(messages, add_generation_prompt):
	formatted = "<BOS>"
	for message in messages:
	if message['from'] == 'human':
	formatted += ' ' + message['value'] + ' '
	elif message['from'] == 'gpt':
	formatted += ' ' + message['value'] + ' '
	else:
	formatted += '<\|' + message['from'] + '\|> ' + message['value'] + ' '
	if add_generation_prompt:
	formatted += ' '
	return formatted

	# Function to generate responses
	def respond(message, history, system_message, max_tokens, temperature, top_p):
	# Prepare the chat history
	messages = [{"from": "system", "value": system_message}]
	for user_msg, bot_msg in history:
	if user_msg:
	messages.append({"from": "human", "value": user_msg})
	if bot_msg:
	messages.append({"from": "gpt", "value": bot_msg})
	messages.append({"from": "human", "value": message})

	# Format the chat input for the model
	chat_input = format_chat(messages, add_generation_prompt=False)

	# Tokenize input (assuming model can handle raw text inputs internally)
	inputs = torch.tensor([ord(c) for c in chat_input]).unsqueeze(0) # Dummy tokenization

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	input_ids=inputs,
	max_length=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True
	)

	response = ''.join([chr(t) for t in outputs[0].tolist() if t < 256]) # Dummy decoding
	yield response.strip()

	# Define the Gradio interface
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
	],
	)

	# Launch the demo
	if __name__ == "__main__":
	demo.launch()