Tijmen2 commited on
Commit
9780084
·
verified ·
1 Parent(s): 9baf1c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -22
app.py CHANGED
@@ -4,11 +4,15 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
4
  import torch
5
  import random
6
 
7
- MODEL_NAME = "AstroMLab/AstroSage-8B"
8
-
9
- model = None
10
- tokenizer = None
11
- streamer = None # these will be initialized the first time the bot function runs
 
 
 
 
12
 
13
  # Placeholder responses for when context is empty
14
  GREETING_MESSAGES = [
@@ -27,23 +31,6 @@ def user(user_message, history):
27
  @spaces.GPU(duration=20)
28
  def bot(history):
29
  """Generate the chatbot response."""
30
- global model, tokenizer, streamer
31
-
32
- if not model:
33
- # initialize the LLM
34
-
35
- # Load the tokenizer
36
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
37
-
38
- # Load the model with 8-bit quantization using bitsandbytes
39
- model = AutoModelForCausalLM.from_pretrained(
40
- MODEL_NAME,
41
- torch_dtype=torch.bfloat16,
42
- load_in_8bit=True, # Enable 8-bit quantization
43
- device_map="auto" # Automatically assign layers to available GPUs
44
- )
45
-
46
- streamer = TextStreamer(tokenizer)
47
 
48
  if not history:
49
  history = []
 
4
  import torch
5
  import random
6
 
7
+ model_name = "AstroMLab/AstroSage-8B"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ streamer = TextStreamer(tokenizer)
10
+ # Load the model with 8-bit quantization using bitsandbytes
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_name,
13
+ torch_dtype=torch.bfloat16,
14
+ load_in_8bit=True,
15
+ )
16
 
17
  # Placeholder responses for when context is empty
18
  GREETING_MESSAGES = [
 
31
  @spaces.GPU(duration=20)
32
  def bot(history):
33
  """Generate the chatbot response."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  if not history:
36
  history = []