raccoote commited on
Commit
d745297
·
verified ·
1 Parent(s): 5d70b52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -3,29 +3,31 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaConfig
3
  import torch
4
  from peft import PeftModel
5
 
6
- # Step 1: Load the base model's configuration
7
  base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
 
 
8
  config = LlamaConfig.from_pretrained(base_model_name)
9
 
10
- # Step 2: Simplify or remove the rope_scaling configuration
11
  if hasattr(config, 'rope_scaling'):
12
  config.rope_scaling = {
13
  'type': 'linear',
14
  'factor': 8.0
15
  }
16
 
17
- # Step 3: Load the tokenizer and base model, ensuring no quantization is attempted
18
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
19
 
20
- # Ensure that no quantization settings are passed
21
  base_model = AutoModelForCausalLM.from_pretrained(
22
  base_model_name,
23
  config=config,
24
  torch_dtype=torch.float32, # Use full precision
25
- device_map={"": "cpu"} # Ensure it's set to CPU if no GPU is available
26
  )
27
 
28
- # Step 4: Load the LoRA adapter from the "raccoote/angry-birds-v2" repository
29
  adapter_model = PeftModel.from_pretrained(base_model, "raccoote/angry-birds-v2")
30
 
31
  def generate_text(prompt):
@@ -33,7 +35,7 @@ def generate_text(prompt):
33
  outputs = adapter_model.generate(**inputs, max_new_tokens=50)
34
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
35
 
36
- # Step 5: Create the Gradio interface
37
  iface = gr.Interface(fn=generate_text,
38
  inputs="text",
39
  outputs="text",
 
3
  import torch
4
  from peft import PeftModel
5
 
6
+ # Define the base model and configuration
7
  base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
8
+
9
+ # Load the configuration
10
  config = LlamaConfig.from_pretrained(base_model_name)
11
 
12
+ # Simplify or adjust the configuration if necessary
13
  if hasattr(config, 'rope_scaling'):
14
  config.rope_scaling = {
15
  'type': 'linear',
16
  'factor': 8.0
17
  }
18
 
19
+ # Load the tokenizer and base model without quantization settings
20
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
21
 
22
+ # Ensure the model is loaded without quantization settings
23
  base_model = AutoModelForCausalLM.from_pretrained(
24
  base_model_name,
25
  config=config,
26
  torch_dtype=torch.float32, # Use full precision
27
+ device_map={"": "cpu"} # Ensure model is loaded on CPU
28
  )
29
 
30
+ # Load the LoRA adapter from the repository
31
  adapter_model = PeftModel.from_pretrained(base_model, "raccoote/angry-birds-v2")
32
 
33
  def generate_text(prompt):
 
35
  outputs = adapter_model.generate(**inputs, max_new_tokens=50)
36
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
37
 
38
+ # Create the Gradio interface
39
  iface = gr.Interface(fn=generate_text,
40
  inputs="text",
41
  outputs="text",