Spaces:

AstroMLab
/

AstroSage-8B

Runtime error

App Files Files Community

Tijmen2 commited on Nov 18, 2024

Commit

00e2bff

verified ·

1 Parent(s): 198d160

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -40

app.py CHANGED Viewed

@@ -1,21 +1,19 @@
 import spaces
 import gradio as gr
-from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import random
-model_path = hf_hub_download(
-    repo_id="AstroMLab/AstroSage-8B-GGUF",
-    filename="AstroSage-8B-Q8_0.gguf"
-)
-llm = Llama(
-    model_path=model_path,
-    n_ctx=2048,
-    chat_format="llama-3",
-    n_gpu_layers=50,  # ensure all layers are on GPU
-    flash_attn=True,
 )
 # Placeholder responses for when context is empty
 GREETING_MESSAGES = [
@@ -33,42 +31,43 @@ def user(user_message, history):
 @spaces.GPU(duration=20)
 def bot(history):
-    """Yield the chatbot response for streaming."""
     if not history:
         history = []
-    # Prepare the messages for the model
-    messages = [
-        {
-            "role": "system",
-            "content": "You are AstroSage, an intelligent AI assistant specializing in astronomy, astrophysics, and cosmology. Provide accurate, scientific information while making complex concepts accessible. You're enthusiastic about space exploration and maintain a sense of wonder about the cosmos."
-        }
-    ]
-    # Add chat history
-    for message in history[:-1]:  # Exclude the last message which we just added
-        messages.append({"role": message["role"], "content": message["content"]})
-    # Add the current user message
-    messages.append({"role": "user", "content": history[-1]["content"]})
-    # Start generating the response
-    history.append({"role": "assistant", "content": ""})
-    # Stream the response
-    response = llm.create_chat_completion(
-        messages=messages,
-        max_tokens=512,
         temperature=0.7,
         top_p=0.95,
-        stream=True,
     )
-    for chunk in response:
-        if chunk and "content" in chunk["choices"][0]["delta"]:
-            history[-1]["content"] += chunk["choices"][0]["delta"]["content"]
-            yield history
 def initial_greeting():
     """Return properly formatted initial greeting."""

 import spaces
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 from huggingface_hub import hf_hub_download
+import torch
 import random
+# Load model and tokenizer from Hugging Face
+model_name = "AstroMLab/AstroSage-8B-GGUF"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float16,
+    device_map="auto"
 )
+streamer = TextStreamer(tokenizer)
 # Placeholder responses for when context is empty
 GREETING_MESSAGES = [
 @spaces.GPU(duration=20)
 def bot(history):
+    """Generate the chatbot response."""
     if not history:
         history = []
+    # Prepare input prompt for the model
+    system_prompt = (
+        "You are AstroSage, an intelligent AI assistant specializing in astronomy, astrophysics, and cosmology. "
+        "Provide accurate, scientific information while making complex concepts accessible. "
+        "You're enthusiastic about space exploration and maintain a sense of wonder about the cosmos."
+    )
+    # Construct the chat history as a single input string
+    prompt = system_prompt + "\n\n"
+    for message in history:
+        if message["role"] == "user":
+            prompt += f"User: {message['content']}\n"
+        else:
+            prompt += f"AstroSage: {message['content']}\n"
+    prompt += "AstroSage: "
+    # Generate response
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=512,
         temperature=0.7,
         top_p=0.95,
+        do_sample=True,
+        streamer=streamer
     )
+    # Decode the generated output and update history
+    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    response_text = response_text[len(prompt):].strip()
+    history.append({"role": "assistant", "content": response_text})
+    yield history
 def initial_greeting():
     """Return properly formatted initial greeting."""