Tijmen2 commited on
Commit
a65b868
·
verified ·
1 Parent(s): d17e177

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -1,18 +1,23 @@
1
  import spaces
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
4
- from huggingface_hub import hf_hub_download
5
  import torch
6
  import random
7
 
8
- # Load model and tokenizer from Hugging Face
9
- model_name = "AstroMLab/AstroSage-8B-GGUF"
 
 
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
11
  model = AutoModelForCausalLM.from_pretrained(
12
  model_name,
13
  torch_dtype=torch.float16,
14
- device_map="auto"
 
15
  )
 
16
  streamer = TextStreamer(tokenizer)
17
 
18
  # Placeholder responses for when context is empty
 
1
  import spaces
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 
4
  import torch
5
  import random
6
 
7
+ # Define model parameters for 8-bit quantized loading
8
+ model_name = "AstroMLab/AstroSage-8B"
9
+
10
+ # Load the tokenizer
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+
13
+ # Load the model with 8-bit quantization using bitsandbytes
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_name,
16
  torch_dtype=torch.float16,
17
+ load_in_8bit=True, # Enable 8-bit quantization
18
+ device_map="auto" # Automatically assign layers to available GPUs
19
  )
20
+
21
  streamer = TextStreamer(tokenizer)
22
 
23
  # Placeholder responses for when context is empty