FlawedLLM commited on
Commit
bdb7dc3
·
verified ·
1 Parent(s): 5bb86c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -8,11 +8,11 @@ import torch
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
9
  from huggingface_hub import login, HfFolder
10
  # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged16bit_clean_final", trust_remote_code=True)
11
- # quantization_config = BitsAndBytesConfig(
12
- # load_in_4bit=True,
13
- # bnb_4bit_use_double_quant=True,
14
- # bnb_4bit_quant_type="nf4",
15
- # bnb_4bit_compute_dtype=torch.float16)
16
  # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged16bit_clean_final",
17
  # device_map="auto",
18
  # quantization_config=quantization_config,
@@ -31,14 +31,14 @@ from huggingface_hub import login, HfFolder
31
  # model = PeftModel.from_pretrained(model, lora_weights)
32
  # Load model directly
33
  # from transformers import AutoTokenizer, AutoModelForCausalLM
34
- bnb_config = BitsAndBytesConfig(
35
- load_in_8bit=True,
36
- llm_int8_threshold=6.0,
37
- )
38
  tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
39
  model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final",
40
  device_map="auto",
41
- quantization_config=bnb_config,)
42
  # alpaca_prompt = You MUST copy from above!
43
  @spaces.GPU(duration=300)
44
  def chunk_it(input_command, item_list):
 
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
9
  from huggingface_hub import login, HfFolder
10
  # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged16bit_clean_final", trust_remote_code=True)
11
+ quantization_config = BitsAndBytesConfig(
12
+ load_in_4bit=True,
13
+ bnb_4bit_use_double_quant=True,
14
+ bnb_4bit_quant_type="nf4",
15
+ bnb_4bit_compute_dtype=torch.float16)
16
  # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged16bit_clean_final",
17
  # device_map="auto",
18
  # quantization_config=quantization_config,
 
31
  # model = PeftModel.from_pretrained(model, lora_weights)
32
  # Load model directly
33
  # from transformers import AutoTokenizer, AutoModelForCausalLM
34
+ # bnb_config = BitsAndBytesConfig(
35
+ # load_in_4bit=True,
36
+ # llm_int8_threshold=6.0,
37
+ # )
38
  tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
39
  model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final",
40
  device_map="auto",
41
+ quantization_config=quantization_config,)
42
  # alpaca_prompt = You MUST copy from above!
43
  @spaces.GPU(duration=300)
44
  def chunk_it(input_command, item_list):