EmTpro01 commited on
Commit
8eb4dc2
·
verified ·
1 Parent(s): be0a6a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py CHANGED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from peft import PeftModel, PeftConfig
5
+
6
+ def load_model_with_lora(base_model_name, lora_path):
7
+ """
8
+ Load base model and merge it with LoRA adapter
9
+ """
10
+ # Load base model
11
+ base_model = AutoModelForCausalLM.from_pretrained(
12
+ base_model_name,
13
+ torch_dtype=torch.float16,
14
+ device_map="auto"
15
+ )
16
+
17
+ # Load and merge LoRA adapter
18
+ model = PeftModel.from_pretrained(base_model, lora_path)
19
+ model = model.merge_and_unload() # Merge adapter weights with base model
20
+
21
+ return model
22
+
23
+ def load_tokenizer(base_model_name):
24
+ """
25
+ Load tokenizer for the base model
26
+ """
27
+ return AutoTokenizer.from_pretrained(base_model_name)
28
+
29
+ def generate_code(prompt, model, tokenizer, max_length=512, temperature=0.7):
30
+ """
31
+ Generate code based on the prompt
32
+ """
33
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
34
+
35
+ outputs = model.generate(
36
+ **inputs,
37
+ max_length=max_length,
38
+ temperature=temperature,
39
+ do_sample=True,
40
+ pad_token_id=tokenizer.eos_token_id
41
+ )
42
+
43
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
44
+
45
+ # Initialize model and tokenizer
46
+ BASE_MODEL_NAME = "unsloth/Llama-3.2-3B-bnb-4bit" # Replace with your base model name
47
+ LORA_PATH = "EmTpro01/Llama-3.2-3B-peft" # Replace with your LoRA adapter path
48
+
49
+ model = load_model_with_lora(BASE_MODEL_NAME, LORA_PATH)
50
+ tokenizer = load_tokenizer(BASE_MODEL_NAME)
51
+
52
+ # Create Gradio interface
53
+ def gradio_generate(prompt, temperature, max_length):
54
+ return generate_code(prompt, model, tokenizer, max_length, temperature)
55
+
56
+ demo = gr.Interface(
57
+ fn=gradio_generate,
58
+ inputs=[
59
+ gr.Textbox(
60
+ lines=5,
61
+ placeholder="Enter your code generation prompt here...",
62
+ label="Prompt"
63
+ ),
64
+ gr.Slider(
65
+ minimum=0.1,
66
+ maximum=1.0,
67
+ value=0.7,
68
+ step=0.1,
69
+ label="Temperature"
70
+ ),
71
+ gr.Slider(
72
+ minimum=64,
73
+ maximum=2048,
74
+ value=512,
75
+ step=64,
76
+ label="Max Length"
77
+ )
78
+ ],
79
+ outputs=gr.Code(language="python", label="Generated Code"),
80
+ title="Code Generation with LoRA",
81
+ description="Enter a prompt to generate code using a fine-tuned model with LoRA adapters",
82
+ )
83
+
84
+ if __name__ == "__main__":
85
+ demo.launch()