Spaces:

AstroMLab
/

AstroSage-8B

Running on Zero

App Files Files Community

AstroSage-8B / app.py

Tijmen2

Update app.py

87b2e49 verified 6 months ago

raw

history blame

6.39 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import random
	import spaces
	import torch

	# Get the number of available CPU cores
	import multiprocessing
	n_cores = multiprocessing.cpu_count()

	# Initialize model with optimized parameters
	model_path = hf_hub_download(
	repo_id="AstroMLab/AstroSage-8B-GGUF",
	filename="AstroSage-8B-Q8_0.gguf"
	)

	# Optimized LLaMA parameters for A100
	llm = Llama(
	model_path=model_path,
	n_ctx=2048, # Keep context window reasonable
	n_threads=n_cores, # Use all available CPU cores
	n_batch=512, # Increase batch size for faster processing
	n_gpu_layers=35, # Offload more layers to GPU
	chat_format="llama-3",
	seed=42,
	f16_kv=True, # Use FP16 for key/value cache
	logits_all=False,
	use_mmap=False, # Disable memory mapping for faster loading
	use_gpu=True,
	tensor_split=None, # Let the model handle tensor splitting
	)

	# Optimize CUDA settings if available
	if torch.cuda.is_available():
	torch.backends.cuda.matmul.allow_tf32 = True # Allow TF32 for faster matrix multiplication
	torch.backends.cudnn.benchmark = True # Enable cudnn autotuner

	# Placeholder responses for when context is empty
	GREETING_MESSAGES = [
	"Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
	"Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
	"AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?",
	"The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
	]

	def user(user_message, history):
	"""Add user message to chat history."""
	if history is None:
	history = []
	return "", history + [{"role": "user", "content": user_message}]

	@spaces.GPU
	def bot(history):
	"""Generate and stream the bot's response with optimized parameters."""
	if not history:
	history = []

	# Optimize context by limiting history
	max_history_tokens = 1024 # Reserve half of context for response
	recent_history = history[-5:] # Keep only last 5 messages for context

	# Prepare the messages for the model
	messages = [
	{
	"role": "system",
	"content": "You are AstroSage, an intelligent AI assistant specializing in astronomy, astrophysics, and space science. Be concise and direct in your responses while maintaining accuracy."
	}
	]

	# Add optimized chat history
	for message in recent_history[:-1]:
	messages.append({"role": message["role"], "content": message["content"]})

	# Add the current user message
	messages.append({"role": "user", "content": history[-1]["content"]})

	# Start generating the response
	history.append({"role": "assistant", "content": ""})

	# Optimized streaming parameters
	response = llm.create_chat_completion(
	messages=messages,
	max_tokens=512,
	temperature=0.7,
	top_p=0.95,
	stream=True,
	top_k=40, # Add top-k sampling
	repeat_penalty=1.1, # Slight penalty for repetition
	mirostat_mode=2, # Enable Mirostat sampling
	mirostat_tau=5.0,
	mirostat_eta=0.1,
	)

	for chunk in response:
	if chunk and "content" in chunk["choices"][0]["delta"]:
	history[-1]["content"] += chunk["choices"][0]["delta"]["content"]
	yield history

	def initial_greeting():
	"""Return properly formatted initial greeting."""
	return [{"role": "assistant", "content": random.choice(GREETING_MESSAGES)}]

	# Custom CSS for a space theme
	custom_css = """
	#component-0 {
	background-color: #1a1a2e;
	border-radius: 15px;
	padding: 20px;
	}
	.dark {
	background-color: #0f0f1a;
	}
	.contain {
	max-width: 1200px !important;
	}
	"""

	# Create the Gradio interface with optimized queue settings
	with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate")) as demo:
	gr.Markdown(
	"""
	# 🌌 AstroSage: Your Cosmic AI Companion

	Welcome to AstroSage, an advanced AI assistant specializing in astronomy, astrophysics, and cosmology.
	Powered by the AstroSage-8B model, I'm here to help you explore the wonders of the universe!

	### What Can I Help You With?
	- 🪐 Explanations of astronomical phenomena
	- 🚀 Space exploration and missions
	- ⭐ Stars, galaxies, and cosmology
	- 🌍 Planetary science and exoplanets
	- 📊 Astrophysics concepts and theories
	- 🔭 Astronomical instruments and observations

	Just type your question below and let's embark on a cosmic journey together!
	"""
	)

	chatbot = gr.Chatbot(
	label="Chat with AstroSage",
	bubble_full_width=False,
	show_label=True,
	height=450,
	type="messages"
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Type your message here",
	placeholder="Ask me anything about space and astronomy...",
	scale=9
	)
	clear = gr.Button("Clear Chat", scale=1)

	# Example questions for quick start
	gr.Examples(
	examples=[
	"What is a black hole and how does it form?",
	"Can you explain the life cycle of a star?",
	"What are exoplanets and how do we detect them?",
	"Tell me about the James Webb Space Telescope.",
	"What is dark matter and why is it important?"
	],
	inputs=msg,
	label="Example Questions"
	)

	# Set up the message chain with optimized queuing
	msg.submit(
	user,
	[msg, chatbot],
	[msg, chatbot],
	queue=False
	).then(
	bot,
	chatbot,
	chatbot,
	queue=True, # Enable queuing for bot responses
	batch=True, # Enable batching
	max_batch_size=4 # Process up to 4 requests together
	)

	# Clear button functionality
	clear.click(lambda: None, None, chatbot, queue=False)

	# Initial greeting
	demo.load(initial_greeting, None, chatbot, queue=False)

	# Launch the app with optimized settings
	if __name__ == "__main__":
	demo.queue(concurrency_count=2) # Allow 2 concurrent requests
	demo.launch()