FlawedLLM commited on
Commit
dcde33a
·
verified ·
1 Parent(s): 6875a6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -3,10 +3,10 @@ import spaces
3
  import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
  import torch
6
- from peft import PeftModel
7
 
8
 
9
- # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/BhashiniLLM")
10
  # quantization_config = BitsAndBytesConfig(
11
  # load_in_4bit=True,
12
  # bnb_4bit_use_double_quant=True,
@@ -20,18 +20,23 @@ from peft import PeftModel
20
  # use_safetensors=True,
21
  # )
22
 
23
- # Assuming you have your HF repository in this format: "your_username/your_model_name"
24
- model_id = "FlawedLLM/BhashiniLLM"
25
 
26
- # Load the base model (the one you fine-tuned with LoRA)
27
- base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto') # Load in 8-bit for efficiency
28
- for param in base_model.parameters():
29
- param.data = param.data.to(torch.float16) # or torch.float32
30
 
31
- # Load the LoRA adapter weights
32
- model = PeftModel.from_pretrained(base_model, model_id)
 
33
 
34
- tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
 
35
 
36
 
37
  @spaces.GPU(duration=300)
 
3
  import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
  import torch
6
+ from peft import PeftModel, PeftConfig
7
 
8
 
9
+ tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/BhashiniLLM")
10
  # quantization_config = BitsAndBytesConfig(
11
  # load_in_4bit=True,
12
  # bnb_4bit_use_double_quant=True,
 
20
  # use_safetensors=True,
21
  # )
22
 
23
+ # # Assuming you have your HF repository in this format: "your_username/your_model_name"
24
+ # model_id = "FlawedLLM/BhashiniLLM"
25
 
26
+ # # Load the base model (the one you fine-tuned with LoRA)
27
+ # base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto') # Load in 8-bit for efficiency
28
+ # for param in base_model.parameters():
29
+ # param.data = param.data.to(torch.float16) # or torch.float32
30
 
31
+ # # Load the LoRA adapter weights
32
+ # model = PeftModel.from_pretrained(base_model, model_id)
33
+ # tokenizer = AutoTokenizer.from_pretrained(model_id)
34
 
35
+
36
+
37
+ config = PeftConfig.from_pretrained("FlawedLLM/BhashiniLLM")
38
+ base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-bnb-4bit", device_map='auto')
39
+ model = PeftModel.from_pretrained(base_model, "FlawedLLM/BhashiniLLM")
40
 
41
 
42
  @spaces.GPU(duration=300)