JamieAi33 commited on
Commit
f73fe76
·
1 Parent(s): a7b05ad

Add PEFT LoRA support

Browse files
Files changed (2) hide show
  1. app.py +25 -22
  2. requirements.txt +3 -2
app.py CHANGED
@@ -1,32 +1,35 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
 
4
- # Load the PEFT model and tokenizer from Hugging Face Hub
5
- model_name = "JamieAi33/Phi-2_PEFT"
6
- model = AutoModelForCausalLM.from_pretrained(model_name)
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
 
9
- # Define the prediction function
10
- def generate_text(prompt, max_length=100):
11
- inputs = tokenizer(prompt, return_tensors="pt")
12
- outputs = model.generate(**inputs, max_new_tokens=max_length)
 
 
 
 
 
 
 
 
 
13
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
14
 
15
- # Create the Gradio interface
16
  with gr.Blocks() as demo:
17
- gr.Markdown("# PEFT LLM Demo")
18
- gr.Markdown("Generate text using the Phi-2 PEFT model.")
19
  with gr.Row():
20
- prompt_input = gr.Textbox(label="Input Prompt", placeholder="Enter a prompt here...")
21
- max_tokens_input = gr.Slider(label="Max Tokens", minimum=10, maximum=200, value=100, step=10)
22
- generate_button = gr.Button("Generate")
23
- output_text = gr.Textbox(label="Generated Text", placeholder="Generated text will appear here.")
24
 
25
- generate_button.click(
26
- fn=generate_text,
27
- inputs=[prompt_input, max_tokens_input],
28
- outputs=output_text
29
- )
30
 
31
- # Launch the app
32
- demo.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from peft import PeftModel
4
 
5
+ # Define model details
6
+ base_model_name = "microsoft/phi-2"
7
+ adapter_name = "JamieAi33/Phi-2-QLora"
 
8
 
9
+ # Load base model
10
+ print("Loading base model...")
11
+ base_model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map="auto")
12
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
13
+
14
+ # Apply LoRA adapter
15
+ print("Loading LoRA adapter...")
16
+ model = PeftModel.from_pretrained(base_model, adapter_name)
17
+
18
+ # Function to generate text
19
+ def generate_text(prompt, max_tokens):
20
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
21
+ outputs = model.generate(**inputs, max_new_tokens=max_tokens)
22
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
23
 
24
+ # Gradio UI
25
  with gr.Blocks() as demo:
26
+ gr.Markdown("# PEFT LoRA Model")
 
27
  with gr.Row():
28
+ prompt = gr.Textbox(label="Prompt", lines=4)
29
+ max_tokens = gr.Slider(label="Max Tokens", minimum=10, maximum=200, value=50)
30
+ output = gr.Textbox(label="Generated Text", lines=6)
 
31
 
32
+ generate_button = gr.Button("Generate")
33
+ generate_button.click(generate_text, inputs=[prompt, max_tokens], outputs=output)
 
 
 
34
 
35
+ demo.launch()
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
- gradio
2
- transformers
3
  torch
 
 
 
 
 
 
1
  torch
2
+ transformers
3
+ peft
4
+ gradio