daresearch commited on
Commit
1fddc84
·
verified ·
1 Parent(s): aef46fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -3,34 +3,37 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from peft import PeftModel
4
  import gradio as gr
5
 
6
- # Step 1: Define the base model and LoRA adapter
7
- base_model_name = "meta-llama/Llama-3.3-70B-Instruct" # Replace with correct model name
8
  adapter_repo = "daresearch/Llama-3.3-70B-ft-exec-roles"
9
 
10
- # Step 2: Load the base model
11
  base_model = AutoModelForCausalLM.from_pretrained(
12
  base_model_name,
13
  device_map="auto",
14
- torch_dtype=torch.float16, # Use FP16 precision
15
  )
16
 
17
- # Step 3: Load the LoRA adapter
18
  model_with_adapter = PeftModel.from_pretrained(
19
  base_model,
20
  adapter_repo,
21
  device_map="auto",
22
  )
 
23
 
24
- # Step 4: Load the tokenizer
 
 
 
25
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
26
 
27
- # Step 5: Define the inference function
28
  def generate_text(prompt, max_length=1024):
29
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to("cuda")
30
  outputs = model_with_adapter.generate(**inputs, max_length=max_length)
31
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
32
 
33
- # Step 6: Create the Gradio interface
34
  iface = gr.Interface(
35
  fn=generate_text,
36
  inputs=[
@@ -42,6 +45,6 @@ iface = gr.Interface(
42
  description="Generate text using a LLaMA model with LoRA adapters."
43
  )
44
 
45
- # Step 7: Launch the Gradio app
46
  if __name__ == "__main__":
47
  iface.launch()
 
3
  from peft import PeftModel
4
  import gradio as gr
5
 
6
+ # Step 1: Load base model
7
+ base_model_name = "meta-llama/Llama-3.3-70B-Instruct"
8
  adapter_repo = "daresearch/Llama-3.3-70B-ft-exec-roles"
9
 
 
10
  base_model = AutoModelForCausalLM.from_pretrained(
11
  base_model_name,
12
  device_map="auto",
13
+ torch_dtype=torch.float16,
14
  )
15
 
16
+ # Step 2: Load LoRA adapter
17
  model_with_adapter = PeftModel.from_pretrained(
18
  base_model,
19
  adapter_repo,
20
  device_map="auto",
21
  )
22
+ print(f"Loaded LoRA adapter from {adapter_repo}")
23
 
24
+ # Verify adapter configuration
25
+ print(model_with_adapter.config)
26
+
27
+ # Step 3: Load tokenizer
28
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
29
 
30
+ # Step 4: Define inference function
31
  def generate_text(prompt, max_length=1024):
32
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to("cuda")
33
  outputs = model_with_adapter.generate(**inputs, max_length=max_length)
34
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
35
 
36
+ # Step 5: Create Gradio interface
37
  iface = gr.Interface(
38
  fn=generate_text,
39
  inputs=[
 
45
  description="Generate text using a LLaMA model with LoRA adapters."
46
  )
47
 
48
+ # Step 6: Launch Gradio app
49
  if __name__ == "__main__":
50
  iface.launch()