Arturo Jiménez de los Galanes Reguillos commited on
Commit
e93ccdc
·
1 Parent(s): 90aa5a9

Add quantization

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from huggingface_hub import login
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
4
  from threading import Thread
 
5
 
6
  MODEL = "m-a-p/OpenCodeInterpreter-DS-33B"
7
 
@@ -19,8 +20,15 @@ def messages_for(python):
19
  {"role": "user", "content": user_prompt_for(python)}
20
  ]
21
 
 
 
 
 
 
 
 
22
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
23
- model = AutoModelForCausalLM.from_pretrained(MODEL)
24
  streamer = TextIteratorStreamer(tokenizer)
25
 
26
  cplusplus = None
 
2
  from huggingface_hub import login
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
4
  from threading import Thread
5
+ import torch
6
 
7
  MODEL = "m-a-p/OpenCodeInterpreter-DS-33B"
8
 
 
20
  {"role": "user", "content": user_prompt_for(python)}
21
  ]
22
 
23
+ quant_config = BitsAndBytesConfig(
24
+ load_in_4bit=True,
25
+ bnb_4bit_use_double_quant=True,
26
+ bnb_4bit_compute_dtype=torch.bfloat16,
27
+ bnb_4bit_quant_type="nf4"
28
+ )
29
+
30
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
31
+ model = AutoModelForCausalLM.from_pretrained(MODEL, device_map="auto", quantization_config=quant_config)
32
  streamer = TextIteratorStreamer(tokenizer)
33
 
34
  cplusplus = None