FlawedLLM commited on
Commit
eb4afbe
·
verified ·
1 Parent(s): bc816b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -10
app.py CHANGED
@@ -6,19 +6,10 @@ import spaces
6
  import gradio as gr
7
  import torch
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
- from bitsandbytes.functional import quantize_blockwise
10
- from bitsandbytes.nn import Linear4bit
11
 
12
  tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
13
- model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
14
- def quantize_model(model):
15
- for name, module in model.named_modules():
16
- # Quantize only Linear layers that haven't already been quantized
17
- if isinstance(module, torch.nn.Linear) and not isinstance(module, Linear4bit):
18
- module = quantize_blockwise(module)
19
 
20
- # Quantize the model (modified)
21
- quantize_model(model)
22
  # alpaca_prompt = You MUST copy from above!
23
  @spaces.GPU(duration=300)
24
  def chunk_it(input_command, item_list):
 
6
  import gradio as gr
7
  import torch
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
9
 
10
  tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
11
+ model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final", load_in_4bit=True, device_map="auto")
 
 
 
 
 
12
 
 
 
13
  # alpaca_prompt = You MUST copy from above!
14
  @spaces.GPU(duration=300)
15
  def chunk_it(input_command, item_list):