EmTpro01 commited on
Commit
af5f253
·
verified ·
1 Parent(s): 2d7b45a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -145
app.py CHANGED
@@ -1,148 +1,31 @@
1
  import gradio as gr
 
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
- from peft import PeftModel
5
- import logging
6
- import os
7
- from huggingface_hub import snapshot_download
8
 
9
- # Set up logging
10
- logging.basicConfig(level=logging.INFO)
11
- logger = logging.getLogger(__name__)
12
-
13
- def download_lora_weights():
14
- """Download LoRA weights from Hugging Face"""
15
- return snapshot_download(
16
- repo_id="EmTpro01/Llama-3.2-3B-peft",
17
- allow_patterns=["adapter_config.json", "adapter_model.bin"],
18
- )
19
-
20
- def load_model_with_lora():
21
- """
22
- Load Llama model and merge it with LoRA adapter
23
- """
24
- try:
25
- # Configure quantization
26
- bnb_config = BitsAndBytesConfig(
27
- load_in_4bit=True,
28
- bnb_4bit_use_double_quant=True,
29
- bnb_4bit_compute_dtype=torch.float16
30
- )
31
-
32
- # Load base model
33
- base_model = AutoModelForCausalLM.from_pretrained(
34
- "unsloth/llama-3.2-3b-bnb-4bit",
35
- quantization_config=bnb_config,
36
- device_map="auto",
37
- trust_remote_code=True
38
- )
39
- logger.info("Successfully loaded base model")
40
-
41
- # Download and load LoRA adapter
42
- lora_path = download_lora_weights()
43
- logger.info(f"Downloaded LoRA weights to: {lora_path}")
44
-
45
- # Load and merge LoRA adapter
46
- model = PeftModel.from_pretrained(base_model, lora_path)
47
- logger.info("Successfully loaded LoRA adapter")
48
-
49
- # For inference, we can merge the LoRA weights with the base model
50
- model = model.merge_and_unload()
51
- logger.info("Successfully merged LoRA weights with base model")
52
-
53
- return model
54
-
55
- except Exception as e:
56
- logger.error(f"Error loading model: {str(e)}")
57
- raise RuntimeError(f"Failed to load model: {str(e)}")
58
-
59
- def load_tokenizer():
60
- """
61
- Load tokenizer for the Llama model
62
- """
63
- try:
64
- tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3.2-3b-bnb-4bit")
65
- logger.info("Successfully loaded tokenizer")
66
- return tokenizer
67
- except Exception as e:
68
- logger.error(f"Error loading tokenizer: {str(e)}")
69
- raise RuntimeError(f"Failed to load tokenizer: {str(e)}")
70
-
71
- def generate_code(prompt, model, tokenizer, max_length=512, temperature=0.7):
72
- """
73
- Generate code based on the prompt
74
- """
75
- try:
76
- # Add any specific prompt template if needed
77
- formatted_prompt = f"### Instruction: Write code for the following task:\n{prompt}\n\n### Response:"
78
-
79
- inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
80
-
81
- outputs = model.generate(
82
- **inputs,
83
- max_length=max_length,
84
- temperature=temperature,
85
- do_sample=True,
86
- top_p=0.95,
87
- top_k=50,
88
- repetition_penalty=1.1,
89
- pad_token_id=tokenizer.eos_token_id
90
- )
91
-
92
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
93
- # Extract only the response part
94
- response = generated_text.split("### Response:")[-1].strip()
95
- return response
96
- except Exception as e:
97
- logger.error(f"Error during code generation: {str(e)}")
98
- return f"Error generating code: {str(e)}"
99
-
100
- # Initialize model and tokenizer
101
- logger.info("Starting model initialization...")
102
- model = load_model_with_lora()
103
- tokenizer = load_tokenizer()
104
- logger.info("Model initialization completed successfully")
105
-
106
- # Create Gradio interface with error handling
107
- def gradio_generate(prompt, temperature, max_length):
108
- try:
109
- return generate_code(prompt, model, tokenizer, max_length, temperature)
110
- except Exception as e:
111
- return f"Error: {str(e)}"
112
-
113
- # Create the Gradio interface
114
- demo = gr.Interface(
115
- fn=gradio_generate,
116
- inputs=[
117
- gr.Textbox(
118
- lines=5,
119
- placeholder="Enter your code generation prompt here...",
120
- label="Prompt"
121
- ),
122
- gr.Slider(
123
- minimum=0.1,
124
- maximum=1.0,
125
- value=0.7,
126
- step=0.1,
127
- label="Temperature"
128
- ),
129
- gr.Slider(
130
- minimum=64,
131
- maximum=2048,
132
- value=512,
133
- step=64,
134
- label="Max Length"
135
- )
136
- ],
137
- outputs=gr.Code(label="Generated Code"),
138
- title="Llama Code Generation with LoRA",
139
- description="Enter a prompt to generate code using Llama 3.2 3B model fine-tuned with LoRA",
140
- examples=[
141
- ["Write a Python function to sort a list of numbers in ascending order"],
142
- ["Create a simple REST API using FastAPI that handles GET and POST requests"],
143
- ["Write a function to check if a string is a palindrome"]
144
- ]
145
- )
146
-
147
- if __name__ == "__main__":
148
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
 
 
 
 
 
4
 
5
+ # Load the fine-tuned model and tokenizer
6
+ model_name = "EmTpro01/Llama-3.2-3B-fine-tuned" # Replace with your Hugging Face model name
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name)
9
+
10
+ # Define the prediction function
11
+ def generate_code(prompt):
12
+ # Tokenize the input
13
+ inputs = tokenizer(prompt, return_tensors="pt")
14
+ # Generate code
15
+ outputs = model.generate(inputs["input_ids"], max_length=200, num_return_sequences=1)
16
+ # Decode the output
17
+ generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
18
+ return generated_code
19
+
20
+ # Set up Gradio interface
21
+ with gr.Blocks() as demo:
22
+ gr.Markdown("## Code Generation with Fine-Tuned Llama Model")
23
+ with gr.Row():
24
+ prompt = gr.Textbox(label="Input Prompt", placeholder="Enter a prompt for code generation...")
25
+ output = gr.Textbox(label="Generated Code")
26
+ generate_button = gr.Button("Generate Code")
27
+
28
+ generate_button.click(generate_code, inputs=prompt, outputs=output)
29
+
30
+ # Launch the interface
31
+ demo.launch()