raccoote commited on
Commit
910ab21
·
verified ·
1 Parent(s): d745297

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -20
app.py CHANGED
@@ -1,34 +1,24 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaConfig
3
- import torch
4
  from peft import PeftModel
5
 
6
  # Define the base model and configuration
7
- base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
8
 
9
- # Load the configuration
10
- config = LlamaConfig.from_pretrained(base_model_name)
11
-
12
- # Simplify or adjust the configuration if necessary
13
- if hasattr(config, 'rope_scaling'):
14
- config.rope_scaling = {
15
- 'type': 'linear',
16
- 'factor': 8.0
17
- }
18
-
19
- # Load the tokenizer and base model without quantization settings
20
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
21
 
22
- # Ensure the model is loaded without quantization settings
 
 
23
  base_model = AutoModelForCausalLM.from_pretrained(
24
  base_model_name,
25
- config=config,
26
- torch_dtype=torch.float32, # Use full precision
27
- device_map={"": "cpu"} # Ensure model is loaded on CPU
28
  )
29
 
30
  # Load the LoRA adapter from the repository
31
- adapter_model = PeftModel.from_pretrained(base_model, "raccoote/angry-birds-v2")
32
 
33
  def generate_text(prompt):
34
  inputs = tokenizer(prompt, return_tensors="pt")
@@ -42,4 +32,4 @@ iface = gr.Interface(fn=generate_text,
42
  title="LLaMA 3.1 with LoRA Adapters",
43
  description="Enter a prompt and get the model's output.")
44
 
45
- iface.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 
3
  from peft import PeftModel
4
 
5
  # Define the base model and configuration
6
+ base_model_name = "raccoote/angry-birds-v2"
7
 
8
+ # Load the tokenizer
 
 
 
 
 
 
 
 
 
 
9
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
10
 
11
+ # Load the model with 8-bit precision
12
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
13
+
14
  base_model = AutoModelForCausalLM.from_pretrained(
15
  base_model_name,
16
+ quantization_config=quantization_config,
17
+ device_map="auto" # This will ensure the model is distributed to available hardware
 
18
  )
19
 
20
  # Load the LoRA adapter from the repository
21
+ adapter_model = PeftModel.from_pretrained(base_model, base_model_name)
22
 
23
  def generate_text(prompt):
24
  inputs = tokenizer(prompt, return_tensors="pt")
 
32
  title="LLaMA 3.1 with LoRA Adapters",
33
  description="Enter a prompt and get the model's output.")
34
 
35
+ iface.launch()