raccoote commited on
Commit
5d70b52
·
verified ·
1 Parent(s): e149a04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -7,24 +7,26 @@ from peft import PeftModel
7
  base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
8
  config = LlamaConfig.from_pretrained(base_model_name)
9
 
10
- # Step 2: Simplify the rope_scaling configuration
11
  if hasattr(config, 'rope_scaling'):
12
  config.rope_scaling = {
13
- 'type': 'dynamic', # Example: 'linear' or 'dynamic'
14
- 'factor': 8.0 # Adjust this value based on your needs
15
  }
16
 
 
17
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 
 
18
  base_model = AutoModelForCausalLM.from_pretrained(
19
  base_model_name,
20
  config=config,
21
- torch_dtype=torch.float32, # Ensure it loads in full precision
22
- device_map="auto" # This ensures it loads correctly on CPU if GPU is not available
23
  )
24
 
25
- # Step 4: Load the LoRA adapter from the local files or Hugging Face repository
26
- adapter_path = "raccoote/angry-birds-v2" # Path to your local adapter or the repository name
27
- adapter_model = PeftModel.from_pretrained(base_model, adapter_path)
28
 
29
  def generate_text(prompt):
30
  inputs = tokenizer(prompt, return_tensors="pt")
 
7
  base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
8
  config = LlamaConfig.from_pretrained(base_model_name)
9
 
10
+ # Step 2: Simplify or remove the rope_scaling configuration
11
  if hasattr(config, 'rope_scaling'):
12
  config.rope_scaling = {
13
+ 'type': 'linear',
14
+ 'factor': 8.0
15
  }
16
 
17
+ # Step 3: Load the tokenizer and base model, ensuring no quantization is attempted
18
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
19
+
20
+ # Ensure that no quantization settings are passed
21
  base_model = AutoModelForCausalLM.from_pretrained(
22
  base_model_name,
23
  config=config,
24
+ torch_dtype=torch.float32, # Use full precision
25
+ device_map={"": "cpu"} # Ensure it's set to CPU if no GPU is available
26
  )
27
 
28
+ # Step 4: Load the LoRA adapter from the "raccoote/angry-birds-v2" repository
29
+ adapter_model = PeftModel.from_pretrained(base_model, "raccoote/angry-birds-v2")
 
30
 
31
  def generate_text(prompt):
32
  inputs = tokenizer(prompt, return_tensors="pt")