Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,34 +1,24 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer,
|
3 |
-
import torch
|
4 |
from peft import PeftModel
|
5 |
|
6 |
# Define the base model and configuration
|
7 |
-
base_model_name = "
|
8 |
|
9 |
-
# Load the
|
10 |
-
config = LlamaConfig.from_pretrained(base_model_name)
|
11 |
-
|
12 |
-
# Simplify or adjust the configuration if necessary
|
13 |
-
if hasattr(config, 'rope_scaling'):
|
14 |
-
config.rope_scaling = {
|
15 |
-
'type': 'linear',
|
16 |
-
'factor': 8.0
|
17 |
-
}
|
18 |
-
|
19 |
-
# Load the tokenizer and base model without quantization settings
|
20 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
21 |
|
22 |
-
#
|
|
|
|
|
23 |
base_model = AutoModelForCausalLM.from_pretrained(
|
24 |
base_model_name,
|
25 |
-
|
26 |
-
|
27 |
-
device_map={"": "cpu"} # Ensure model is loaded on CPU
|
28 |
)
|
29 |
|
30 |
# Load the LoRA adapter from the repository
|
31 |
-
adapter_model = PeftModel.from_pretrained(base_model,
|
32 |
|
33 |
def generate_text(prompt):
|
34 |
inputs = tokenizer(prompt, return_tensors="pt")
|
@@ -42,4 +32,4 @@ iface = gr.Interface(fn=generate_text,
|
|
42 |
title="LLaMA 3.1 with LoRA Adapters",
|
43 |
description="Enter a prompt and get the model's output.")
|
44 |
|
45 |
-
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|
|
3 |
from peft import PeftModel
|
4 |
|
5 |
# Define the base model and configuration
|
6 |
+
base_model_name = "raccoote/angry-birds-v2"
|
7 |
|
8 |
+
# Load the tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
10 |
|
11 |
+
# Load the model with 8-bit precision
|
12 |
+
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
13 |
+
|
14 |
base_model = AutoModelForCausalLM.from_pretrained(
|
15 |
base_model_name,
|
16 |
+
quantization_config=quantization_config,
|
17 |
+
device_map="auto" # This will ensure the model is distributed to available hardware
|
|
|
18 |
)
|
19 |
|
20 |
# Load the LoRA adapter from the repository
|
21 |
+
adapter_model = PeftModel.from_pretrained(base_model, base_model_name)
|
22 |
|
23 |
def generate_text(prompt):
|
24 |
inputs = tokenizer(prompt, return_tensors="pt")
|
|
|
32 |
title="LLaMA 3.1 with LoRA Adapters",
|
33 |
description="Enter a prompt and get the model's output.")
|
34 |
|
35 |
+
iface.launch()
|