Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,24 +7,26 @@ from peft import PeftModel
|
|
7 |
base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
|
8 |
config = LlamaConfig.from_pretrained(base_model_name)
|
9 |
|
10 |
-
# Step 2: Simplify the rope_scaling configuration
|
11 |
if hasattr(config, 'rope_scaling'):
|
12 |
config.rope_scaling = {
|
13 |
-
'type': '
|
14 |
-
'factor': 8.0
|
15 |
}
|
16 |
|
|
|
17 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
|
|
|
|
18 |
base_model = AutoModelForCausalLM.from_pretrained(
|
19 |
base_model_name,
|
20 |
config=config,
|
21 |
-
torch_dtype=torch.float32, #
|
22 |
-
device_map="
|
23 |
)
|
24 |
|
25 |
-
# Step 4: Load the LoRA adapter from the
|
26 |
-
|
27 |
-
adapter_model = PeftModel.from_pretrained(base_model, adapter_path)
|
28 |
|
29 |
def generate_text(prompt):
|
30 |
inputs = tokenizer(prompt, return_tensors="pt")
|
|
|
7 |
base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
|
8 |
config = LlamaConfig.from_pretrained(base_model_name)
|
9 |
|
10 |
+
# Step 2: Simplify or remove the rope_scaling configuration
|
11 |
if hasattr(config, 'rope_scaling'):
|
12 |
config.rope_scaling = {
|
13 |
+
'type': 'linear',
|
14 |
+
'factor': 8.0
|
15 |
}
|
16 |
|
17 |
+
# Step 3: Load the tokenizer and base model, ensuring no quantization is attempted
|
18 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
19 |
+
|
20 |
+
# Ensure that no quantization settings are passed
|
21 |
base_model = AutoModelForCausalLM.from_pretrained(
|
22 |
base_model_name,
|
23 |
config=config,
|
24 |
+
torch_dtype=torch.float32, # Use full precision
|
25 |
+
device_map={"": "cpu"} # Ensure it's set to CPU if no GPU is available
|
26 |
)
|
27 |
|
28 |
+
# Step 4: Load the LoRA adapter from the "raccoote/angry-birds-v2" repository
|
29 |
+
adapter_model = PeftModel.from_pretrained(base_model, "raccoote/angry-birds-v2")
|
|
|
30 |
|
31 |
def generate_text(prompt):
|
32 |
inputs = tokenizer(prompt, return_tensors="pt")
|