Spaces:
Runtime error
Runtime error
FlawedLLM
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -6,19 +6,10 @@ import spaces
|
|
6 |
import gradio as gr
|
7 |
import torch
|
8 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
9 |
-
from bitsandbytes.functional import quantize_blockwise
|
10 |
-
from bitsandbytes.nn import Linear4bit
|
11 |
|
12 |
tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
|
13 |
-
model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
|
14 |
-
def quantize_model(model):
|
15 |
-
for name, module in model.named_modules():
|
16 |
-
# Quantize only Linear layers that haven't already been quantized
|
17 |
-
if isinstance(module, torch.nn.Linear) and not isinstance(module, Linear4bit):
|
18 |
-
module = quantize_blockwise(module)
|
19 |
|
20 |
-
# Quantize the model (modified)
|
21 |
-
quantize_model(model)
|
22 |
# alpaca_prompt = You MUST copy from above!
|
23 |
@spaces.GPU(duration=300)
|
24 |
def chunk_it(input_command, item_list):
|
|
|
6 |
import gradio as gr
|
7 |
import torch
|
8 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
|
|
9 |
|
10 |
tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final")
|
11 |
+
model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged4bit_clean_final", load_in_4bit=True, device_map="auto")
|
|
|
|
|
|
|
|
|
|
|
12 |
|
|
|
|
|
13 |
# alpaca_prompt = You MUST copy from above!
|
14 |
@spaces.GPU(duration=300)
|
15 |
def chunk_it(input_command, item_list):
|