Spaces:

raccoote
/

angry-birds-LLM-level-generator

Runtime error

raccoote commited on Aug 27, 2024

Commit

910ab21

verified ·

1 Parent(s): d745297

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,34 +1,24 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaConfig
-import torch
 from peft import PeftModel
 # Define the base model and configuration
-base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
-# Load the configuration
-config = LlamaConfig.from_pretrained(base_model_name)
-# Simplify or adjust the configuration if necessary
-if hasattr(config, 'rope_scaling'):
-    config.rope_scaling = {
-        'type': 'linear',
-        'factor': 8.0
-    }
-# Load the tokenizer and base model without quantization settings
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-# Ensure the model is loaded without quantization settings
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
-    config=config,
-    torch_dtype=torch.float32,  # Use full precision
-    device_map={"": "cpu"}  # Ensure model is loaded on CPU
 )
 # Load the LoRA adapter from the repository
-adapter_model = PeftModel.from_pretrained(base_model, "raccoote/angry-birds-v2")
 def generate_text(prompt):
     inputs = tokenizer(prompt, return_tensors="pt")
@@ -42,4 +32,4 @@ iface = gr.Interface(fn=generate_text,
                      title="LLaMA 3.1 with LoRA Adapters",
                      description="Enter a prompt and get the model's output.")
-iface.launch()

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from peft import PeftModel
 # Define the base model and configuration
+base_model_name = "raccoote/angry-birds-v2"
+# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+# Load the model with 8-bit precision
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
+    quantization_config=quantization_config,
+    device_map="auto"  # This will ensure the model is distributed to available hardware
 )
 # Load the LoRA adapter from the repository
+adapter_model = PeftModel.from_pretrained(base_model, base_model_name)
 def generate_text(prompt):
     inputs = tokenizer(prompt, return_tensors="pt")
                      title="LLaMA 3.1 with LoRA Adapters",
                      description="Enter a prompt and get the model's output.")
+iface.launch()