BhashiniSpace_Text_gemma

Runtime error

FlawedLLM commited on May 22, 2024

Commit

dcde33a

verified ·

1 Parent(s): 6875a6e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,10 +3,10 @@ import spaces
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
-from peft import PeftModel
-# tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/BhashiniLLM")
 # quantization_config = BitsAndBytesConfig(
 #         load_in_4bit=True,
 #         bnb_4bit_use_double_quant=True,
@@ -20,18 +20,23 @@ from peft import PeftModel
 #                                              use_safetensors=True,
 #                                             )
-# Assuming you have your HF repository in this format: "your_username/your_model_name"
-model_id = "FlawedLLM/BhashiniLLM"
-# Load the base model (the one you fine-tuned with LoRA)
-base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')  # Load in 8-bit for efficiency
-for param in base_model.parameters():
-    param.data = param.data.to(torch.float16)  # or torch.float32
-# Load the LoRA adapter weights
-model = PeftModel.from_pretrained(base_model, model_id)
-tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU(duration=300)

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
+from peft import PeftModel, PeftConfig
+tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/BhashiniLLM")
 # quantization_config = BitsAndBytesConfig(
 #         load_in_4bit=True,
 #         bnb_4bit_use_double_quant=True,
 #                                              use_safetensors=True,
 #                                             )
+# # Assuming you have your HF repository in this format: "your_username/your_model_name"
+# model_id = "FlawedLLM/BhashiniLLM"
+# # Load the base model (the one you fine-tuned with LoRA)
+# base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')  # Load in 8-bit for efficiency
+# for param in base_model.parameters():
+#     param.data = param.data.to(torch.float16)  # or torch.float32
+# # Load the LoRA adapter weights
+# model = PeftModel.from_pretrained(base_model, model_id)
+# tokenizer = AutoTokenizer.from_pretrained(model_id)
+config = PeftConfig.from_pretrained("FlawedLLM/BhashiniLLM")
+base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-bnb-4bit",  device_map='auto')
+model = PeftModel.from_pretrained(base_model, "FlawedLLM/BhashiniLLM")
 @spaces.GPU(duration=300)