Spaces:

AstroMLab
/

AstroSage-8B

Runtime error

File size: 5,611 Bytes

import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import random

# Custom CSS for better styling
custom_css = """
    .gradio-container {
        background: linear-gradient(to bottom, #1a1a2e, #16213e) !important;
    }
    .header-text {
        text-align: center;
        color: #e2e8f0;
        font-size: 2.5em;
        font-weight: bold;
        margin: 1em 0;
        text-shadow: 0 0 10px rgba(255, 255, 255, 0.3);
    }
    .subheader {
        text-align: center;
        color: #94a3b8;
        font-size: 1.2em;
        margin-bottom: 2em;
    }
    .controls-section {
        background: rgba(255, 255, 255, 0.05);
        padding: 1.5em;
        border-radius: 10px;
        margin: 1em 0;
    }
    .model-info {
        background: rgba(0, 0, 0, 0.2);
        padding: 1em;
        border-radius: 8px;
        margin-top: 1em;
        color: #94a3b8;
    }
"""

# Initialize model
model_path = hf_hub_download(
    repo_id="AstroMLab/AstroSage-8B-GGUF",
    filename="AstroSage-8B-Q8_0.gguf"
)

llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=4,
    chat_format="llama-3",
    seed=42,
    f16_kv=True,
    logits_all=False,
    use_mmap=True,
    use_gpu=True
)

# Placeholder responses for when context is empty
GREETING_MESSAGES = [
    "Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
    "Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
    "AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?",
    "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
]

def get_random_greeting():
    return random.choice(GREETING_MESSAGES)

def respond(message, history, system_message, max_tokens, temperature, top_p):
    messages = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})
    
    response = llm.create_chat_completion(
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p
    )
    
    return response["choices"][0]["message"]["content"]

def regenerate(message, history, system_message, max_tokens, temperature, top_p):
    # Remove the last assistant message from history
    if history and len(history) > 0:
        history = history[:-1]
    
    # Generate a new response
    return respond(message, history, system_message, max_tokens, temperature, top_p)

def clear_context():
    return [], get_random_greeting()

with gr.Blocks(css=custom_css) as demo:
    gr.HTML(
        """
        <div class="header-text">🌌 AstroSage-LLAMA-3.1-8B</div>
        <div class="subheader">Your AI Guide to the Cosmos</div>
        """
    )
    
    chatbot = gr.Chatbot(
        value=[[None, get_random_greeting()]],
        height=400,
        show_label=False,
    )
    msg = gr.Textbox(
        placeholder="Ask about astronomy, astrophysics, or cosmology...",
        show_label=False,
    )
    
    with gr.Accordion("Advanced Settings", open=False) as advanced_settings:
        system_msg = gr.Textbox(
            value="You are AstroSage, a highly knowledgeable AI assistant specialized in astronomy, astrophysics, and cosmology. Provide accurate, engaging, and educational responses about space science and the universe.",
            label="System Message",
            lines=3
        )
        with gr.Row():
            max_tokens = gr.Slider(
                minimum=1,
                maximum=2048,
                value=512,
                step=1,
                label="Max Tokens"
            )
            temperature = gr.Slider(
                minimum=0.1,
                maximum=4.0,
                value=0.7,
                step=0.1,
                label="Temperature"
            )
            top_p = gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.9,
                step=0.05,
                label="Top-p"
            )
    
    with gr.Row():
        clear = gr.Button("🌟 New Chat")
        regenerate_btn = gr.Button("🔄 Regenerate")
        submit = gr.Button("Send 🚀", variant="primary")
    
    gr.HTML(
        """
        <div class="model-info">
            <p>📚 Model: AstroSage-LLAMA-3.1-8B (8-bit Quantized)</p>
            <p>🔧 Built with llama.cpp, Gradio, and Python</p>
            <p>💫 Specialized in astronomy, astrophysics, and cosmology</p>
        </div>
        """
    )
    
    # Set up event handlers
    msg.submit(
        respond,
        [msg, chatbot, system_msg, max_tokens, temperature, top_p],
        [chatbot],
        queue=False
    ).then(
        lambda: "",
        None,
        [msg],
        queue=False
    )
    
    submit.click(
        respond,
        [msg, chatbot, system_msg, max_tokens, temperature, top_p],
        [chatbot],
        queue=False
    ).then(
        lambda: "",
        None,
        [msg],
        queue=False
    )
    
    regenerate_btn.click(
        regenerate,
        [msg, chatbot, system_msg, max_tokens, temperature, top_p],
        [chatbot],
        queue=False
    )
    
    clear.click(
        clear_context,
        None,
        [chatbot, msg],
        queue=False
    )

if __name__ == "__main__":
    demo.launch()