Llama3-philosophy-demo

Sleeping

App Files Files Community

Llama3-philosophy-demo / app.py

ruggsea

push

66e8e27 6 months ago

raw

history blame contribute delete

4.78 kB

	import os
	from collections.abc import Iterator
	from threading import Thread

	import gradio as gr
	import spaces
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

	MAX_MAX_NEW_TOKENS = 8000
	DEFAULT_MAX_NEW_TOKENS = 4000
	MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

	DESCRIPTION = """\
	# Philosophy Chat with Llama 3.1

	This Space showcases the Llama3.1-Instruct-SEP-Chat model from ruggsea, a fine-tuned instruction version of Meta's Llama 3.1 8B model, specifically tailored for philosophical discussions with a formal and informative tone. The model was trained using the Stanford Encyclopedia of Philosophy dataset and carefully crafted prompts.

	Feel free to engage in philosophical discussions and ask questions. The model supports multi-turn conversations and will maintain context.
	"""

	LICENSE = """
	<p/>

	---
	As a derivative work of Llama 3.1, this demo is governed by the original Meta license and acceptable use policy.
	"""

	if torch.cuda.is_available():
	model_id = "ruggsea/Llama3.1-Instruct-SEP-Chat"
	model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	tokenizer.use_default_system_prompt = False

	@spaces.GPU
	def generate(
	message: str,
	chat_history: list[dict],
	system_prompt: str = "",
	max_new_tokens: int = 4000,
	temperature: float = 0.7,
	top_p: float = 0.9,
	top_k: int = 50,
	repetition_penalty: float = 1.1,
	) -> Iterator[str]:
	conversation = []
	if system_prompt:
	conversation.append({"role": "system", "content": system_prompt})
	conversation += chat_history
	conversation.append({"role": "user", "content": message})

	input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
	if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
	input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
	gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
	input_ids = input_ids.to(model.device)

	streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
	generate_kwargs = dict(
	input_ids=input_ids,
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	top_p=top_p,
	top_k=top_k,
	temperature=temperature,
	num_beams=1,
	repetition_penalty=repetition_penalty,
	)
	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()

	outputs = []
	for text in streamer:
	outputs.append(text)
	yield "".join(outputs)

	chat_interface = gr.ChatInterface(
	fn=generate,
	additional_inputs=[
	gr.Textbox(
	label="System prompt",
	lines=6,
	value="You are a knowledgeable philosophy professor using the Stanford Encyclopedia of Philosophy as your knowledge base. Provide clear, accurate responses using markdown formatting. Focus on philosophical concepts and maintain academic rigor while being accessible. Always cite relevant philosophers and concepts."
	),
	gr.Slider(
	label="Max new tokens",
	minimum=1,
	maximum=MAX_MAX_NEW_TOKENS,
	step=1,
	value=DEFAULT_MAX_NEW_TOKENS,
	),
	gr.Slider(
	label="Temperature",
	minimum=0.1,
	maximum=4.0,
	step=0.1,
	value=0.7,
	),
	gr.Slider(
	label="Top-p (nucleus sampling)",
	minimum=0.05,
	maximum=1.0,
	step=0.05,
	value=0.9,
	),
	gr.Slider(
	label="Top-k",
	minimum=1,
	maximum=1000,
	step=1,
	value=50,
	),
	gr.Slider(
	label="Repetition penalty",
	minimum=1.0,
	maximum=2.0,
	step=0.05,
	value=1.1,
	),
	],
	stop_btn=None,
	examples=[
	["What is the trolley problem and what are its main ethical implications?"],
	["Can you explain Plato's Theory of Forms?"],
	["What is the difference between analytic and continental philosophy?"],
	["How does Kant's Categorical Imperative work?"],
	["What is the problem of consciousness in philosophy of mind?"],
	],
	cache_examples=False,
	)

	with gr.Blocks(css="style.css", fill_height=True) as demo:
	gr.Markdown(DESCRIPTION)
	gr.DuplicateButton(
	value="Duplicate Space for private use",
	elem_id="duplicate-button"
	)
	chat_interface.render()
	gr.Markdown(LICENSE)

	if __name__ == "__main__":
	demo.queue(max_size=20).launch()