ubermenchh commited on
Commit
60d726a
·
1 Parent(s): cde4903

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -2
app.py CHANGED
@@ -59,7 +59,7 @@ for i in range(0, len(data), batch_size):
59
  ]
60
  index.upsert(vectors=zip(ids, embeds, metadata))
61
 
62
- model_id = 'meta-llama/Llama-2-7b-chat-hf'
63
  hf_auth = os.environ.get('HF_AUTH_KEY')
64
 
65
  bnb_config = transformers.BitsAndBytesConfig(
@@ -67,7 +67,6 @@ bnb_config = transformers.BitsAndBytesConfig(
67
  bnb_4bit_quant_type='nf4',
68
  bnb_4bit_use_double_quant=True,
69
  bnb_4bit_compute_dtype=bfloat16,
70
- load_in_8bit_fp32_cpu_offload=True
71
  )
72
  model_config = transformers.AutoConfig.from_pretrained(model_id, use_auth_token=hf_auth)
73
  model = transformers.AutoModelForCausalLM.from_pretrained(
 
59
  ]
60
  index.upsert(vectors=zip(ids, embeds, metadata))
61
 
62
+ model_id = 'Trelis/Llama-2-7b-chat-hf-sharded-bf16'
63
  hf_auth = os.environ.get('HF_AUTH_KEY')
64
 
65
  bnb_config = transformers.BitsAndBytesConfig(
 
67
  bnb_4bit_quant_type='nf4',
68
  bnb_4bit_use_double_quant=True,
69
  bnb_4bit_compute_dtype=bfloat16,
 
70
  )
71
  model_config = transformers.AutoConfig.from_pretrained(model_id, use_auth_token=hf_auth)
72
  model = transformers.AutoModelForCausalLM.from_pretrained(