DarwinAnim8or commited on
Commit
e2c3a04
Β·
verified Β·
1 Parent(s): 99eb889

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +299 -0
app.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import requests
5
+ import io
6
+ from PIL import Image
7
+ import re
8
+ import json
9
+ import xml.etree.ElementTree as ET
10
+
11
+ class SmolLMWithTools:
12
+ def __init__(self):
13
+ # Initialize SmolLM3
14
+ self.checkpoint = "HuggingFaceTB/SmolLM3-3B"
15
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ print(f"Loading SmolLM3 on {self.device}...")
17
+
18
+ self.tokenizer = AutoTokenizer.from_pretrained(self.checkpoint)
19
+ self.model = AutoModelForCausalLM.from_pretrained(
20
+ self.checkpoint,
21
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
22
+ ).to(self.device)
23
+
24
+ # HF API setup for FLUX
25
+ self.hf_token = None
26
+ self.flux_api_url = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-schnell"
27
+
28
+ # Define available tools
29
+ self.tools = [
30
+ {
31
+ "name": "generate_image",
32
+ "description": "Generate an image using AI based on a text description. Use this when the user asks for images, pictures, drawings, or visual content.",
33
+ "parameters": {
34
+ "type": "object",
35
+ "properties": {
36
+ "prompt": {
37
+ "type": "string",
38
+ "description": "A detailed description of the image to generate. Be specific and descriptive."
39
+ }
40
+ },
41
+ "required": ["prompt"]
42
+ }
43
+ }
44
+ ]
45
+
46
+ print("Model loaded successfully!")
47
+
48
+ def set_hf_token(self, token):
49
+ """Set the Hugging Face API token"""
50
+ self.hf_token = token
51
+ return "βœ… HF Token set successfully!"
52
+
53
+ def generate_image_tool(self, prompt):
54
+ """Tool function to generate images using FLUX"""
55
+ if not self.hf_token:
56
+ return {"success": False, "error": "HF token not set", "image": None}
57
+
58
+ headers = {"Authorization": f"Bearer {self.hf_token}"}
59
+ data = {"inputs": prompt}
60
+
61
+ try:
62
+ response = requests.post(self.flux_api_url, headers=headers, json=data)
63
+
64
+ if response.status_code == 200:
65
+ image = Image.open(io.BytesIO(response.content))
66
+ return {"success": True, "message": f"Successfully generated image: {prompt}", "image": image}
67
+ elif response.status_code == 503:
68
+ return {"success": False, "error": "Model is loading, please try again", "image": None}
69
+ else:
70
+ return {"success": False, "error": f"API error: {response.status_code}", "image": None}
71
+
72
+ except Exception as e:
73
+ return {"success": False, "error": str(e), "image": None}
74
+
75
+ def parse_tool_calls(self, text):
76
+ """Parse tool calls from model output"""
77
+ tool_calls = []
78
+
79
+ # Look for XML-style tool calls
80
+ tool_call_pattern = r'<tool_call>\s*<invoke name="([^"]+)">\s*<parameter name="([^"]+)">([^<]+)</parameter>\s*</invoke>\s*</tool_call>'
81
+ matches = re.findall(tool_call_pattern, text, re.DOTALL)
82
+
83
+ for match in matches:
84
+ tool_name, param_name, param_value = match
85
+ tool_calls.append({
86
+ "name": tool_name,
87
+ "parameters": {param_name: param_value.strip()}
88
+ })
89
+
90
+ return tool_calls
91
+
92
+ def execute_tool_call(self, tool_call):
93
+ """Execute a tool call and return results"""
94
+ tool_name = tool_call["name"]
95
+ parameters = tool_call["parameters"]
96
+
97
+ if tool_name == "generate_image":
98
+ prompt = parameters.get("prompt", "")
99
+ return self.generate_image_tool(prompt)
100
+ else:
101
+ return {"success": False, "error": f"Unknown tool: {tool_name}"}
102
+
103
+ def chat_with_tools(self, messages):
104
+ """Generate response with tool calling capability"""
105
+ try:
106
+ # Apply chat template with tools
107
+ inputs = self.tokenizer.apply_chat_template(
108
+ messages,
109
+ enable_thinking=False,
110
+ xml_tools=self.tools,
111
+ add_generation_prompt=True,
112
+ tokenize=True,
113
+ return_tensors="pt"
114
+ )
115
+
116
+ inputs = inputs.to(self.device)
117
+
118
+ # Generate response
119
+ with torch.no_grad():
120
+ outputs = self.model.generate(
121
+ inputs,
122
+ max_new_tokens=1024,
123
+ temperature=0.7,
124
+ do_sample=True,
125
+ pad_token_id=self.tokenizer.eos_token_id
126
+ )
127
+
128
+ # Decode the full response
129
+ full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
130
+
131
+ # Extract just the new content (after the prompt)
132
+ prompt_text = self.tokenizer.decode(inputs[0], skip_special_tokens=True)
133
+ new_content = full_response[len(prompt_text):].strip()
134
+
135
+ return new_content
136
+
137
+ except Exception as e:
138
+ return f"Error generating response: {str(e)}"
139
+
140
+ def process_conversation(self, user_message, history, hf_token):
141
+ """Process a conversation turn with potential tool calls"""
142
+ if hf_token and not self.hf_token:
143
+ self.set_hf_token(hf_token)
144
+
145
+ # Build message history
146
+ messages = []
147
+ for h in history:
148
+ messages.append({"role": "user", "content": h[0]})
149
+ if h[1]:
150
+ messages.append({"role": "assistant", "content": h[1]})
151
+
152
+ messages.append({"role": "user", "content": user_message})
153
+
154
+ # Get model response
155
+ assistant_response = self.chat_with_tools(messages)
156
+
157
+ # Check for tool calls in the response
158
+ tool_calls = self.parse_tool_calls(assistant_response)
159
+ generated_image = None
160
+ final_response = assistant_response
161
+
162
+ if tool_calls:
163
+ # Execute tool calls
164
+ tool_results = []
165
+ for tool_call in tool_calls:
166
+ result = self.execute_tool_call(tool_call)
167
+ tool_results.append(result)
168
+
169
+ if tool_call["name"] == "generate_image" and result.get("image"):
170
+ generated_image = result["image"]
171
+
172
+ # Continue conversation with tool results
173
+ messages.append({"role": "assistant", "content": assistant_response})
174
+
175
+ # Add tool results as a system message
176
+ tool_summary = "\n".join([
177
+ f"Tool {i+1} result: {result.get('message', result.get('error', 'Unknown result'))}"
178
+ for i, result in enumerate(tool_results)
179
+ ])
180
+
181
+ messages.append({"role": "user", "content": f"Tool execution results: {tool_summary}\n\nPlease respond to the user about the results."})
182
+
183
+ # Get final response
184
+ final_response = self.chat_with_tools(messages)
185
+
186
+ # Update history
187
+ history.append([user_message, final_response])
188
+
189
+ return history, "", generated_image
190
+
191
+ # Initialize the system
192
+ chat_system = SmolLMWithTools()
193
+
194
+ def create_interface():
195
+ with gr.Blocks(title="SmolLM3 Tool Calling + FLUX", theme=gr.themes.Soft()) as app:
196
+ gr.Markdown("""
197
+ # πŸ€–πŸ› οΈ SmolLM3 with Tool Calling + FLUX
198
+
199
+ SmolLM3 can autonomously decide when to generate images based on your conversation!
200
+ Just chat naturally - the model will call the image generation tool when appropriate.
201
+
202
+ **Examples:**
203
+ - "Can you create a picture of a sunset?"
204
+ - "I need an image of a robot for my presentation"
205
+ - "Draw me a fantasy landscape"
206
+ - "Show me what a purple elephant would look like"
207
+ """)
208
+
209
+ with gr.Row():
210
+ with gr.Column(scale=2):
211
+ # HF Token input
212
+ hf_token_input = gr.Textbox(
213
+ label="πŸ”‘ Hugging Face API Token",
214
+ placeholder="Enter your HF token for image generation",
215
+ type="password"
216
+ )
217
+
218
+ # Chat interface
219
+ chatbot = gr.Chatbot(
220
+ label="Chat with SmolLM3 (Tool Calling Enabled)",
221
+ height=500,
222
+ show_copy_button=True
223
+ )
224
+
225
+ msg_input = gr.Textbox(
226
+ label="Message",
227
+ placeholder="Ask for anything - SmolLM3 will decide if it needs to generate an image...",
228
+ lines=3
229
+ )
230
+
231
+ with gr.Row():
232
+ send_btn = gr.Button("Send πŸ“€", variant="primary")
233
+ clear_btn = gr.Button("Clear πŸ—‘οΈ")
234
+
235
+ with gr.Column(scale=1):
236
+ image_output = gr.Image(
237
+ label="Generated Images",
238
+ height=500
239
+ )
240
+
241
+ gr.Markdown("""
242
+ ### πŸ”§ Available Tools:
243
+ - **generate_image**: Creates images from text descriptions
244
+
245
+ The model decides autonomously when to use tools based on context!
246
+ """)
247
+
248
+ # Event handlers
249
+ def respond(message, history, hf_token):
250
+ if not message.strip():
251
+ return history, "", None
252
+ return chat_system.process_conversation(message, history, hf_token)
253
+
254
+ # Send message
255
+ send_btn.click(
256
+ respond,
257
+ inputs=[msg_input, chatbot, hf_token_input],
258
+ outputs=[chatbot, msg_input, image_output]
259
+ )
260
+
261
+ # Enter key
262
+ msg_input.submit(
263
+ respond,
264
+ inputs=[msg_input, chatbot, hf_token_input],
265
+ outputs=[chatbot, msg_input, image_output]
266
+ )
267
+
268
+ # Clear chat
269
+ clear_btn.click(
270
+ lambda: ([], None),
271
+ outputs=[chatbot, image_output]
272
+ )
273
+
274
+ gr.Markdown("""
275
+ ### πŸ“ Setup Instructions:
276
+ 1. **Get HF Token**: Visit [HuggingFace Tokens](https://huggingface.co/settings/tokens)
277
+ 2. **Create Token**: Generate a token with "Read" permissions
278
+ 3. **Enter Token**: Paste it in the field above
279
+ 4. **Start Chatting**: Ask for anything - images, questions, explanations!
280
+
281
+ ### 🧠 How it Works:
282
+ - SmolLM3 analyzes your message
283
+ - Decides if it needs to call tools
284
+ - Generates appropriate tool calls
285
+ - Executes the tools and responds with results
286
+
287
+ **The AI is in full control of when and how to use tools!**
288
+ """)
289
+
290
+ return app
291
+
292
+ if __name__ == "__main__":
293
+ app = create_interface()
294
+ app.launch(
295
+ server_name="0.0.0.0",
296
+ server_port=7860,
297
+ share=False,
298
+ debug=True
299
+ )