Spaces:

raccoote
/

angry-birds-LLM-level-generator

Runtime error

raccoote commited on Aug 27, 2024

Commit

d745297

verified ·

1 Parent(s): 5d70b52

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,29 +3,31 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaConfig
 import torch
 from peft import PeftModel
-# Step 1: Load the base model's configuration
 base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
 config = LlamaConfig.from_pretrained(base_model_name)
-# Step 2: Simplify or remove the rope_scaling configuration
 if hasattr(config, 'rope_scaling'):
     config.rope_scaling = {
         'type': 'linear',
         'factor': 8.0
     }
-# Step 3: Load the tokenizer and base model, ensuring no quantization is attempted
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-# Ensure that no quantization settings are passed
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
     config=config,
     torch_dtype=torch.float32,  # Use full precision
-    device_map={"": "cpu"}  # Ensure it's set to CPU if no GPU is available
 )
-# Step 4: Load the LoRA adapter from the "raccoote/angry-birds-v2" repository
 adapter_model = PeftModel.from_pretrained(base_model, "raccoote/angry-birds-v2")
 def generate_text(prompt):
@@ -33,7 +35,7 @@ def generate_text(prompt):
     outputs = adapter_model.generate(**inputs, max_new_tokens=50)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Step 5: Create the Gradio interface
 iface = gr.Interface(fn=generate_text,
                      inputs="text",
                      outputs="text",

 import torch
 from peft import PeftModel
+# Define the base model and configuration
 base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
+# Load the configuration
 config = LlamaConfig.from_pretrained(base_model_name)
+# Simplify or adjust the configuration if necessary
 if hasattr(config, 'rope_scaling'):
     config.rope_scaling = {
         'type': 'linear',
         'factor': 8.0
     }
+# Load the tokenizer and base model without quantization settings
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+# Ensure the model is loaded without quantization settings
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
     config=config,
     torch_dtype=torch.float32,  # Use full precision
+    device_map={"": "cpu"}  # Ensure model is loaded on CPU
 )
+# Load the LoRA adapter from the repository
 adapter_model = PeftModel.from_pretrained(base_model, "raccoote/angry-birds-v2")
 def generate_text(prompt):
     outputs = adapter_model.generate(**inputs, max_new_tokens=50)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Create the Gradio interface
 iface = gr.Interface(fn=generate_text,
                      inputs="text",
                      outputs="text",