ranamhamoud commited on
Commit
c11dcf8
·
verified ·
1 Parent(s): 55c5ebc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -30,12 +30,12 @@ if not torch.cuda.is_available():
30
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
31
 
32
  # Model and Tokenizer Configuration
33
- model_id = "meta-llama/Llama-2-7b-hf"
34
  bnb_config = BitsAndBytesConfig(
35
- load_in_8bit=True,
36
  bnb_4bit_use_double_quant=False,
37
- bnb_8bit_quant_type="nf8",
38
- bnb_8bit_compute_dtype=torch.bfloat16
39
  )
40
  base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=bnb_config)
41
  model = PeftModel.from_pretrained(base_model, "ranamhamoud/storytell")
 
30
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
31
 
32
  # Model and Tokenizer Configuration
33
+ model_id = "meta-llama/Llama-2-7b-chat"
34
  bnb_config = BitsAndBytesConfig(
35
+ load_in_4bit=True,
36
  bnb_4bit_use_double_quant=False,
37
+ bnb_4bit_quant_type="nf4",
38
+ bnb_4bit_compute_dtype=torch.bfloat16
39
  )
40
  base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=bnb_config)
41
  model = PeftModel.from_pretrained(base_model, "ranamhamoud/storytell")