File size: 2,435 Bytes
be1aa47 c94cc88 11d7701 c94cc88 11d7701 c94cc88 be1aa47 c94cc88 be1aa47 11d7701 be1aa47 11d7701 ca35e53 abe401d fe25716 abe401d be1aa47 fe25716 abe401d fe25716 be1aa47 ca35e53 96edac1 abe401d fe25716 96edac1 ca35e53 fe25716 abe401d 96edac1 fe25716 6a2645a abe401d be1aa47 fe25716 abe401d 6a2645a abe401d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import random
# Initialize model
model_path = hf_hub_download(
repo_id="AstroMLab/AstroSage-8B-GGUF",
filename="AstroSage-8B-Q8_0.gguf"
)
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=4,
chat_format="llama-3",
seed=42,
f16_kv=True,
logits_all=False,
use_mmap=True,
use_gpu=True
)
# Placeholder responses for when context is empty
GREETING_MESSAGES = [
"Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
"Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
"AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?",
"The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
]
def respond_stream(message, history):
if not message:
return
system_message = "Assume the role of AstroSage, a helpful chatbot designed to answer user queries about astronomy, astrophysics, and cosmology."
messages = [{"role": "system", "content": system_message}]
for user, assistant in history:
messages.append({"role": "user", "content": user})
if assistant:
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
try:
past_tokens = "" # Accumulate and yield all tokens so far
for chunk in llm.create_chat_completion(
messages=messages,
max_tokens=512,
temperature=0.7,
top_p=0.9,
stream=True
):
delta = chunk["choices"][0]["delta"]
if "content" in delta:
new_tokens = delta["content"]
past_tokens += new_tokens
yield past_tokens # Yield the accumulated response to allow streaming
except Exception as e:
yield f"Error during generation: {e}"
initial_message = random.choice(GREETING_MESSAGES)
chatbot = gr.Chatbot([[None, initial_message]]).style(height=750) # Set height
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=0.8):
chatbot.render()
with gr.Column(scale=0.2):
clear = gr.Button("Clear")
clear.click(lambda: [], None, chatbot,queue=False)
demo.queue().launch()
|