Spaces:

AstroMLab
/

AstroSage-8B

Runtime error

File size: 2,435 Bytes

be1aa47
 
c94cc88
11d7701
c94cc88
11d7701
c94cc88
 
 
 
be1aa47
 
c94cc88
be1aa47
 
11d7701
be1aa47
 
 
 
 
 
 
11d7701
 
 
 
 
 
 
 
ca35e53
abe401d
fe25716
 
abe401d
be1aa47
fe25716
 
abe401d
fe25716
be1aa47
ca35e53
96edac1
abe401d
fe25716
96edac1
ca35e53
 
 
fe25716
 
 
abe401d
 
 
 
96edac1
fe25716
 
6a2645a
abe401d
be1aa47
fe25716
abe401d
 
 
 
 
 
 
 
6a2645a
abe401d

import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import random

# Initialize model
model_path = hf_hub_download(
    repo_id="AstroMLab/AstroSage-8B-GGUF",
    filename="AstroSage-8B-Q8_0.gguf"
)

llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=4,
    chat_format="llama-3",
    seed=42,
    f16_kv=True,
    logits_all=False,
    use_mmap=True,
    use_gpu=True
)

# Placeholder responses for when context is empty
GREETING_MESSAGES = [
    "Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
    "Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
    "AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?",
    "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
]

def respond_stream(message, history):
    if not message:
        return

    system_message = "Assume the role of AstroSage, a helpful chatbot designed to answer user queries about astronomy, astrophysics, and cosmology."
    messages = [{"role": "system", "content": system_message}]
    for user, assistant in history:
        messages.append({"role": "user", "content": user})
        if assistant:
            messages.append({"role": "assistant", "content": assistant})
    messages.append({"role": "user", "content": message})

    try:
        past_tokens = ""  # Accumulate and yield all tokens so far
        for chunk in llm.create_chat_completion(
            messages=messages,
            max_tokens=512,
            temperature=0.7,
            top_p=0.9,
            stream=True
        ):
            delta = chunk["choices"][0]["delta"]
            if "content" in delta:
                new_tokens = delta["content"]
                past_tokens += new_tokens
                yield past_tokens  # Yield the accumulated response to allow streaming
    except Exception as e:
        yield f"Error during generation: {e}"

initial_message = random.choice(GREETING_MESSAGES)
chatbot = gr.Chatbot([[None, initial_message]]).style(height=750)  # Set height

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=0.8):
            chatbot.render()

        with gr.Column(scale=0.2):
           clear = gr.Button("Clear")

    clear.click(lambda: [], None, chatbot,queue=False)

    demo.queue().launch()