Spaces:

AstroMLab
/

AstroSage

Sleeping

Tijmen2 commited on Nov 18, 2024

Commit

9780084

verified ·

1 Parent(s): 9baf1c4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,11 +4,15 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 import torch
 import random
-MODEL_NAME = "AstroMLab/AstroSage-8B"
-model = None
-tokenizer = None
-streamer = None # these will be initialized the first time the bot function runs
 # Placeholder responses for when context is empty
 GREETING_MESSAGES = [
@@ -27,23 +31,6 @@ def user(user_message, history):
 @spaces.GPU(duration=20)
 def bot(history):
     """Generate the chatbot response."""
-    global model, tokenizer, streamer
-    if not model:
-        # initialize the LLM
-        # Load the tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-        # Load the model with 8-bit quantization using bitsandbytes
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_NAME,
-            torch_dtype=torch.bfloat16,
-            load_in_8bit=True,               # Enable 8-bit quantization
-            device_map="auto"                # Automatically assign layers to available GPUs
-        )
-        streamer = TextStreamer(tokenizer)
     if not history:
         history = []

 import torch
 import random
+model_name = "AstroMLab/AstroSage-8B"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+streamer = TextStreamer(tokenizer)
+# Load the model with 8-bit quantization using bitsandbytes
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.bfloat16,
+    load_in_8bit=True,
+)
 # Placeholder responses for when context is empty
 GREETING_MESSAGES = [
 @spaces.GPU(duration=20)
 def bot(history):
     """Generate the chatbot response."""
     if not history:
         history = []