FlawedLLM commited on
Commit
bc816b1
·
verified ·
1 Parent(s): 84092dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -13,7 +13,8 @@ tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_c
13
  model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
14
  def quantize_model(model):
15
  for name, module in model.named_modules():
16
- if isinstance(module, torch.nn.Linear) and not isinstance(module, torch.nn.Linear4bit):
 
17
  module = quantize_blockwise(module)
18
 
19
  # Quantize the model (modified)
 
13
  model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
14
  def quantize_model(model):
15
  for name, module in model.named_modules():
16
+ # Quantize only Linear layers that haven't already been quantized
17
+ if isinstance(module, torch.nn.Linear) and not isinstance(module, Linear4bit):
18
  module = quantize_blockwise(module)
19
 
20
  # Quantize the model (modified)