Spaces:

AstroMLab
/

AstroSage-8B

Runtime error

App Files Files Community

Tijmen2 commited on Nov 20, 2024

Commit

5a18dfb

verified ·

1 Parent(s): 6288ea1

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -161

app.py CHANGED Viewed

@@ -1,39 +1,33 @@
-import os
-import requests
-import subprocess
-import json
-import time
-import spaces
 import gradio as gr
 import random
-from typing import List, Optional, Tuple, Dict
-# Constants for the llama.cpp server
-API_PATH_HEALTH = "/health"
-API_PATH_COMPLETIONS = "/chat/completions"
-LLAMA_CPP_SERVER_BASE = "http://127.0.0.1:8080"
-LLAMA_CPP_SERVER_START_TIMEOUT = 50  # seconds
-MODEL_FILENAME = "AstroSage-8B-Q8_0.gguf"
-HF_MODEL_ID = "AstroMLab/AstroSage-8B-GGUF"
-# Ensure the model is available
-if not os.path.exists(MODEL_FILENAME):
-    url = f"https://huggingface.co/{HF_MODEL_ID}/resolve/main/{MODEL_FILENAME}"
-    subprocess.check_call(["curl", "-o", MODEL_FILENAME, "-L", url])
-if not os.path.exists("llama-server"):
-    subprocess.check_call("curl -o llama-server -L https://ngxson-llamacpp-builder.hf.space/llama-server", shell=True)
-    subprocess.check_call("chmod +x llama-server", shell=True)
-# Roles and History Types
-class Role:
-    SYSTEM = "system"
-    USER = "user"
-    ASSISTANT = "assistant"
-History = List[Dict[str, str]]  # Chat history with "role" and "content"
-# Placeholder greeting messages
 GREETING_MESSAGES = [
     "Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
     "Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
@@ -41,54 +35,80 @@ GREETING_MESSAGES = [
     "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
 ]
-# Helper functions
-def wait_until_llamacpp_ready():
-    """Wait until the llama.cpp server is ready."""
-    trials = 0
-    while trials < LLAMA_CPP_SERVER_START_TIMEOUT:
-        try:
-            response = requests.get(LLAMA_CPP_SERVER_BASE + API_PATH_HEALTH)
-            if response.status_code == 200:
-                return
-        except requests.exceptions.RequestException:
-            pass
-        time.sleep(1)
-        trials += 1
-    raise TimeoutError("llama.cpp server did not start in time.")
-def initial_greeting() -> History:
-    """Generate the initial greeting from the assistant."""
-    return [{"role": "assistant", "content": random.choice(GREETING_MESSAGES)}]
-def send_request_to_llama(query: str, history: History) -> str:
-    """Send a chat request to the llama.cpp server."""
-    messages = [{"role": Role.SYSTEM, "content": "You are AstroSage, an AI assistant specializing in astronomy, astrophysics, and cosmology."}]
-    messages.extend(history)
-    messages.append({"role": Role.USER, "content": query})
-    headers = {"Content-Type": "application/json"}
-    data = {"temperature": 0.7, "messages": messages, "stream": True}
-    response = requests.post(LLAMA_CPP_SERVER_BASE + API_PATH_COMPLETIONS, headers=headers, json=data, stream=True)
-    response.raise_for_status()
-    response_text = ""
-    for line in response.iter_lines():
-        line = line.decode("utf-8")
-        if line.startswith("data: ") and not line.endswith("[DONE]"):
-            data = json.loads(line[len("data: "):])
-            response_text += data["choices"][0]["delta"].get("content", "")
-    return response_text
-@spaces.GPU
-def bot(history: Optional[History]) -> History:
-    """Generate the assistant's response."""
     if history is None:
         history = []
-    query = history[-1]["content"]
-    response = send_request_to_llama(query, history[:-1])
-    history.append({"role": "assistant", "content": response})
-    return history
 # Custom CSS for a space theme
 custom_css = """
@@ -105,91 +125,74 @@ custom_css = """
 }
 """
-# Launch llama.cpp server
-llama_proc = subprocess.Popen([
-    "./llama-server"
-], env=dict(
-    os.environ,
-    LLAMA_HOST="0.0.0.0",
-    LLAMA_PORT="8080",
-    LLAMA_ARG_CTX_SIZE=str(2048),
-    LLAMA_ARG_MODEL=MODEL_FILENAME,
-    LLAMA_ARG_N_GPU_LAYERS="9999",
-))
-try:
-    wait_until_llamacpp_ready()
-    # Create the Gradio interface
-    with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate")) as demo:
-        gr.Markdown(
-            """
-            # 🌌 AstroSage: Your Cosmic AI Companion
-            Welcome to AstroSage, an advanced AI assistant specializing in astronomy, astrophysics, and cosmology.
-            Powered by the AstroSage-Llama-3.1-8B model, I'm here to help you explore the wonders of the universe!
-            ### What Can I Help You With?
-            - 🪐 Explanations of astronomical phenomena
-            - 🚀 Space exploration and missions
-            - ⭐ Stars, galaxies, and cosmology
-            - 🌍 Planetary science and exoplanets
-            - 📊 Astrophysics concepts and theories
-            - 🔭 Astronomical instruments and observations
-            Just type your question below and let's embark on a cosmic journey together!
-            """
-        )
-        chatbot = gr.Chatbot(
-            label="Chat with AstroSage",
-            bubble_full_width=False,
-            show_label=True,
-            height=450,
-            type="messages"
-        )
-        with gr.Row():
-            msg = gr.Textbox(
-                label="Type your message here",
-                placeholder="Ask me anything about space and astronomy...",
-                scale=9
-            )
-            clear = gr.Button("Clear Chat", scale=1)
-        # Example questions for quick start
-        gr.Examples(
-            examples=[
-                "What is a black hole and how does it form?",
-                "Can you explain the life cycle of a star?",
-                "What are exoplanets and how do we detect them?",
-                "Tell me about the James Webb Space Telescope.",
-                "What is dark matter and why is it important?"
-            ],
-            inputs=msg,
-            label="Example Questions"
-        )
-        # Set up the message chain
-        msg.submit(
-            lambda x, y: (x, y + [{"role": "user", "content": x}]),
-            [msg, chatbot],
-            [msg, chatbot],
-            queue=False
-        ).then(
-            bot,
-            chatbot,
-            chatbot
         )
-        # Clear button functionality
-        clear.click(lambda: None, None, chatbot, queue=False)
-        # Initial greeting
-        demo.load(initial_greeting, None, chatbot, queue=False)
-    # Launch the app
     demo.launch()
-finally:
-    llama_proc.kill()

+from threading import Thread
 import gradio as gr
 import random
+import torch
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    AutoConfig,
+    TextIteratorStreamer
+)
+# Constants for the model and configuration
+MODEL_ID = "universeTBD/astrollama"
+WINDOW_SIZE = 4096
+DEVICE = "cuda"
+# Load model configuration, tokenizer, and model
+config = AutoConfig.from_pretrained(pretrained_model_name_or_path=MODEL_ID)
+tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=MODEL_ID)
+model = AutoModelForCausalLM.from_pretrained(
+    pretrained_model_name_or_path=MODEL_ID,
+    config=config,
+    device_map="auto",
+    use_safetensors=True,
+    trust_remote_code=True,
+    load_in_4bit=True,
+    torch_dtype=torch.bfloat16
+)
+# Placeholder responses for when context is empty
 GREETING_MESSAGES = [
     "Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
     "Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
     "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
 ]
+def generate_text(prompt: str, history: list, max_new_tokens=512, temperature=0.7, top_p=0.95, top_k=50):
+    """
+    Generate a response using the transformer model.
+    """
+    # Combine history into the prompt
+    formatted_history = "\n".join([f"{msg['role']}: {msg['content']}" for msg in history])
+    prompt_with_history = f"{formatted_history}\nUser: {prompt}\nAssistant:"
+    # Encode the prompt
+    inputs = tokenizer([prompt_with_history], return_tensors="pt", truncation=True).to(DEVICE)
+    input_length = inputs["input_ids"].shape[-1]
+    max_new_tokens = min(max_new_tokens, WINDOW_SIZE - input_length)
+    # Prepare text streamer for live updates
+    streamer = TextIteratorStreamer(
+        tokenizer=tokenizer,
+        timeout=10.0,
+        skip_prompt=True,
+        skip_special_tokens=True
+    )
+    generation_kwargs = dict(
+        **inputs,
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+    )
+    # Generate the response in a separate thread for streaming
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    # Collect and return the response
+    response = ""
+    for new_text in streamer:
+        response += new_text
+        yield response
+def user(user_message, history):
+    """
+    Add the user's message to the history.
+    """
     if history is None:
         history = []
+    return "", history + [{"role": "user", "content": user_message}]
+def bot(history):
+    """
+    Generate the bot's response based on the history.
+    """
+    if not history:
+        history = [{"role": "assistant", "content": random.choice(GREETING_MESSAGES)}]
+    last_user_message = history[-1]["content"] if history else ""
+    response_generator = generate_text(last_user_message, history)
+    history.append({"role": "assistant", "content": ""})
+    # Stream the response back
+    for partial_response in response_generator:
+        history[-1]["content"] = partial_response
+        yield history
+def initial_greeting():
+    """
+    Return the initial greeting message.
+    """
+    return [{"role": "assistant", "content": random.choice(GREETING_MESSAGES)}]
 # Custom CSS for a space theme
 custom_css = """
 }
 """
+# Create the Gradio interface
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate")) as demo:
+    gr.Markdown(
+        """
+        # 🌌 AstroSage: Your Cosmic AI Companion
+        Welcome to AstroSage, an advanced AI assistant specializing in astronomy, astrophysics, and cosmology.
+        Powered by the AstroSage-Llama-3.1-8B model, I'm here to help you explore the wonders of the universe!
+        ### What Can I Help You With?
+        - 🪐 Explanations of astronomical phenomena
+        - 🚀 Space exploration and missions
+        - ⭐ Stars, galaxies, and cosmology
+        - 🌍 Planetary science and exoplanets
+        - 📊 Astrophysics concepts and theories
+        - 🔭 Astronomical instruments and observations
+        Just type your question below and let's embark on a cosmic journey together!
+        """
+    )
+    chatbot = gr.Chatbot(
+        label="Chat with AstroSage",
+        bubble_full_width=False,
+        show_label=True,
+        height=450,
+        type="messages"
+    )
+    with gr.Row():
+        msg = gr.Textbox(
+            label="Type your message here",
+            placeholder="Ask me anything about space and astronomy...",
+            scale=9
         )
+        clear = gr.Button("Clear Chat", scale=1)
+    # Example questions for quick start
+    gr.Examples(
+        examples=[
+            "What is a black hole and how does it form?",
+            "Can you explain the life cycle of a star?",
+            "What are exoplanets and how do we detect them?",
+            "Tell me about the James Webb Space Telescope.",
+            "What is dark matter and why is it important?"
+        ],
+        inputs=msg,
+        label="Example Questions"
+    )
+    # Set up the message chain with streaming
+    msg.submit(
+        user,
+        [msg, chatbot],
+        [msg, chatbot],
+        queue=False
+    ).then(
+        bot,
+        chatbot,
+        chatbot
+    )
+    # Clear button functionality
+    clear.click(lambda: None, None, chatbot, queue=False)
+    # Initial greeting
+    demo.load(initial_greeting, None, chatbot, queue=False)
+# Launch the app
+if __name__ == "__main__":
     demo.launch()