mlabonne commited on
Commit
8fa77d8
·
verified ·
1 Parent(s): cc449f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -68,14 +68,14 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
68
  # Load model
69
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
70
  quantization_config = BitsAndBytesConfig(
71
- load_in_4bit=True,
72
- bnb_4bit_compute_dtype=torch.bfloat16
73
  )
74
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
75
  model = AutoModelForCausalLM.from_pretrained(
76
  MODEL_ID,
77
  device_map="auto",
78
- # quantization_config=quantization_config,
79
  attn_implementation="flash_attention_2",
80
  )
81
 
 
68
  # Load model
69
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
70
  quantization_config = BitsAndBytesConfig(
71
+ load_in_8bit=True,
72
+ # bnb_4bit_compute_dtype=torch.bfloat16
73
  )
74
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
75
  model = AutoModelForCausalLM.from_pretrained(
76
  MODEL_ID,
77
  device_map="auto",
78
+ quantization_config=quantization_config,
79
  attn_implementation="flash_attention_2",
80
  )
81