Spaces:
Runtime error
Runtime error
FlawedLLM
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,10 @@ import spaces
|
|
3 |
import gradio as gr
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
5 |
import torch
|
6 |
-
from peft import PeftModel
|
7 |
|
8 |
|
9 |
-
|
10 |
# quantization_config = BitsAndBytesConfig(
|
11 |
# load_in_4bit=True,
|
12 |
# bnb_4bit_use_double_quant=True,
|
@@ -20,18 +20,23 @@ from peft import PeftModel
|
|
20 |
# use_safetensors=True,
|
21 |
# )
|
22 |
|
23 |
-
# Assuming you have your HF repository in this format: "your_username/your_model_name"
|
24 |
-
model_id = "FlawedLLM/BhashiniLLM"
|
25 |
|
26 |
-
# Load the base model (the one you fine-tuned with LoRA)
|
27 |
-
base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto') # Load in 8-bit for efficiency
|
28 |
-
for param in base_model.parameters():
|
29 |
-
|
30 |
|
31 |
-
# Load the LoRA adapter weights
|
32 |
-
model = PeftModel.from_pretrained(base_model, model_id)
|
|
|
33 |
|
34 |
-
|
|
|
|
|
|
|
|
|
35 |
|
36 |
|
37 |
@spaces.GPU(duration=300)
|
|
|
3 |
import gradio as gr
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
5 |
import torch
|
6 |
+
from peft import PeftModel, PeftConfig
|
7 |
|
8 |
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/BhashiniLLM")
|
10 |
# quantization_config = BitsAndBytesConfig(
|
11 |
# load_in_4bit=True,
|
12 |
# bnb_4bit_use_double_quant=True,
|
|
|
20 |
# use_safetensors=True,
|
21 |
# )
|
22 |
|
23 |
+
# # Assuming you have your HF repository in this format: "your_username/your_model_name"
|
24 |
+
# model_id = "FlawedLLM/BhashiniLLM"
|
25 |
|
26 |
+
# # Load the base model (the one you fine-tuned with LoRA)
|
27 |
+
# base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto') # Load in 8-bit for efficiency
|
28 |
+
# for param in base_model.parameters():
|
29 |
+
# param.data = param.data.to(torch.float16) # or torch.float32
|
30 |
|
31 |
+
# # Load the LoRA adapter weights
|
32 |
+
# model = PeftModel.from_pretrained(base_model, model_id)
|
33 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_id)
|
34 |
|
35 |
+
|
36 |
+
|
37 |
+
config = PeftConfig.from_pretrained("FlawedLLM/BhashiniLLM")
|
38 |
+
base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-bnb-4bit", device_map='auto')
|
39 |
+
model = PeftModel.from_pretrained(base_model, "FlawedLLM/BhashiniLLM")
|
40 |
|
41 |
|
42 |
@spaces.GPU(duration=300)
|