Spaces:
Running
Running
Commit
·
50606af
1
Parent(s):
2d84b2a
feat: comic postcards generation
Browse files- Gradio_UI.py +100 -22
- README.md +27 -5
- app.py +41 -7
Gradio_UI.py
CHANGED
@@ -19,12 +19,45 @@ import re
|
|
19 |
import shutil
|
20 |
from typing import Optional
|
21 |
|
22 |
-
from smolagents.agent_types import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
from smolagents.agents import ActionStep, MultiStepAgent
|
24 |
from smolagents.memory import MemoryStep
|
25 |
from smolagents.utils import _is_package_available
|
26 |
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def pull_messages_from_step(
|
29 |
step_log: MemoryStep,
|
30 |
):
|
@@ -33,7 +66,9 @@ def pull_messages_from_step(
|
|
33 |
|
34 |
if isinstance(step_log, ActionStep):
|
35 |
# Output the step number
|
36 |
-
step_number =
|
|
|
|
|
37 |
yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")
|
38 |
|
39 |
# First yield the thought/reasoning from the LLM
|
@@ -41,9 +76,15 @@ def pull_messages_from_step(
|
|
41 |
# Clean up the LLM output
|
42 |
model_output = step_log.model_output.strip()
|
43 |
# Remove any trailing <end_code> and extra backticks, handling multiple possible formats
|
44 |
-
model_output = re.sub(
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
model_output = model_output.strip()
|
48 |
yield gr.ChatMessage(role="assistant", content=model_output)
|
49 |
|
@@ -63,8 +104,12 @@ def pull_messages_from_step(
|
|
63 |
|
64 |
if used_code:
|
65 |
# Clean up the content by removing any end code tags
|
66 |
-
content = re.sub(
|
67 |
-
|
|
|
|
|
|
|
|
|
68 |
content = content.strip()
|
69 |
if not content.startswith("```python"):
|
70 |
content = f"```python\n{content}\n```"
|
@@ -90,7 +135,11 @@ def pull_messages_from_step(
|
|
90 |
yield gr.ChatMessage(
|
91 |
role="assistant",
|
92 |
content=f"{log_content}",
|
93 |
-
metadata={
|
|
|
|
|
|
|
|
|
94 |
)
|
95 |
|
96 |
# Nesting any errors under the tool call
|
@@ -98,7 +147,11 @@ def pull_messages_from_step(
|
|
98 |
yield gr.ChatMessage(
|
99 |
role="assistant",
|
100 |
content=str(step_log.error),
|
101 |
-
metadata={
|
|
|
|
|
|
|
|
|
102 |
)
|
103 |
|
104 |
# Update parent message metadata to done status without yielding a new message
|
@@ -106,17 +159,25 @@ def pull_messages_from_step(
|
|
106 |
|
107 |
# Handle standalone errors but not from tool calls
|
108 |
elif hasattr(step_log, "error") and step_log.error is not None:
|
109 |
-
yield gr.ChatMessage(
|
|
|
|
|
|
|
|
|
110 |
|
111 |
# Calculate duration and token information
|
112 |
step_footnote = f"{step_number}"
|
113 |
-
if hasattr(step_log, "input_token_count") and hasattr(
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
step_footnote += token_str
|
118 |
if hasattr(step_log, "duration"):
|
119 |
-
step_duration =
|
|
|
|
|
|
|
|
|
120 |
step_footnote += step_duration
|
121 |
step_footnote = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
|
122 |
yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
|
@@ -139,7 +200,9 @@ def stream_to_gradio(
|
|
139 |
total_input_tokens = 0
|
140 |
total_output_tokens = 0
|
141 |
|
142 |
-
for step_log in agent.run(
|
|
|
|
|
143 |
# Track tokens if model provides them
|
144 |
if hasattr(agent.model, "last_input_token_count"):
|
145 |
total_input_tokens += agent.model.last_input_token_count
|
@@ -155,7 +218,6 @@ def stream_to_gradio(
|
|
155 |
|
156 |
final_answer = step_log # Last log is the run's final_answer
|
157 |
final_answer = handle_agent_output_types(final_answer)
|
158 |
-
|
159 |
if isinstance(final_answer, AgentText):
|
160 |
yield gr.ChatMessage(
|
161 |
role="assistant",
|
@@ -166,13 +228,23 @@ def stream_to_gradio(
|
|
166 |
role="assistant",
|
167 |
content={"path": final_answer.to_string(), "mime_type": "image/png"},
|
168 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
elif isinstance(final_answer, AgentAudio):
|
170 |
yield gr.ChatMessage(
|
171 |
role="assistant",
|
172 |
content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
|
173 |
)
|
174 |
else:
|
175 |
-
yield gr.ChatMessage(
|
|
|
|
|
176 |
|
177 |
|
178 |
class GradioUI:
|
@@ -242,10 +314,14 @@ class GradioUI:
|
|
242 |
sanitized_name = "".join(sanitized_name)
|
243 |
|
244 |
# Save the uploaded file to the specified folder
|
245 |
-
file_path = os.path.join(
|
|
|
|
|
246 |
shutil.copy(file.name, file_path)
|
247 |
|
248 |
-
return gr.Textbox(
|
|
|
|
|
249 |
|
250 |
def log_user_message(self, text_input, file_uploads_log):
|
251 |
return (
|
@@ -277,7 +353,9 @@ class GradioUI:
|
|
277 |
# If an upload folder is provided, enable the upload feature
|
278 |
if self.file_upload_folder is not None:
|
279 |
upload_file = gr.File(label="Upload a file")
|
280 |
-
upload_status = gr.Textbox(
|
|
|
|
|
281 |
upload_file.change(
|
282 |
self.upload_file,
|
283 |
[upload_file, file_uploads_log],
|
@@ -293,4 +371,4 @@ class GradioUI:
|
|
293 |
demo.launch(debug=True, share=True, **kwargs)
|
294 |
|
295 |
|
296 |
-
__all__ = ["stream_to_gradio", "GradioUI"]
|
|
|
19 |
import shutil
|
20 |
from typing import Optional
|
21 |
|
22 |
+
from smolagents.agent_types import (
|
23 |
+
AgentAudio,
|
24 |
+
AgentImage,
|
25 |
+
AgentText,
|
26 |
+
ImageType,
|
27 |
+
is_torch_available,
|
28 |
+
AgentType,
|
29 |
+
)
|
30 |
from smolagents.agents import ActionStep, MultiStepAgent
|
31 |
from smolagents.memory import MemoryStep
|
32 |
from smolagents.utils import _is_package_available
|
33 |
|
34 |
|
35 |
+
_AGENT_TYPE_MAPPING = {"string": AgentText, "image": AgentImage, "audio": AgentAudio}
|
36 |
+
|
37 |
+
|
38 |
+
def handle_agent_output_types(output, output_type=None):
|
39 |
+
if output_type in _AGENT_TYPE_MAPPING:
|
40 |
+
# If the class has defined outputs, we can map directly according to the class definition
|
41 |
+
decoded_outputs = _AGENT_TYPE_MAPPING[output_type](output)
|
42 |
+
return decoded_outputs
|
43 |
+
|
44 |
+
# If the class does not have defined output, then we map according to the type
|
45 |
+
if isinstance(output, str):
|
46 |
+
return AgentText(output)
|
47 |
+
if isinstance(output, ImageType):
|
48 |
+
return AgentImage(output)
|
49 |
+
if isinstance(output, (list, tuple)) and all(
|
50 |
+
isinstance(item, ImageType) for item in output
|
51 |
+
):
|
52 |
+
return [AgentImage(img) for img in output]
|
53 |
+
if is_torch_available():
|
54 |
+
import torch
|
55 |
+
|
56 |
+
if isinstance(output, torch.Tensor):
|
57 |
+
return AgentAudio(output)
|
58 |
+
return output
|
59 |
+
|
60 |
+
|
61 |
def pull_messages_from_step(
|
62 |
step_log: MemoryStep,
|
63 |
):
|
|
|
66 |
|
67 |
if isinstance(step_log, ActionStep):
|
68 |
# Output the step number
|
69 |
+
step_number = (
|
70 |
+
f"Step {step_log.step_number}" if step_log.step_number is not None else ""
|
71 |
+
)
|
72 |
yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")
|
73 |
|
74 |
# First yield the thought/reasoning from the LLM
|
|
|
76 |
# Clean up the LLM output
|
77 |
model_output = step_log.model_output.strip()
|
78 |
# Remove any trailing <end_code> and extra backticks, handling multiple possible formats
|
79 |
+
model_output = re.sub(
|
80 |
+
r"```\s*<end_code>", "```", model_output
|
81 |
+
) # handles ```<end_code>
|
82 |
+
model_output = re.sub(
|
83 |
+
r"<end_code>\s*```", "```", model_output
|
84 |
+
) # handles <end_code>```
|
85 |
+
model_output = re.sub(
|
86 |
+
r"```\s*\n\s*<end_code>", "```", model_output
|
87 |
+
) # handles ```\n<end_code>
|
88 |
model_output = model_output.strip()
|
89 |
yield gr.ChatMessage(role="assistant", content=model_output)
|
90 |
|
|
|
104 |
|
105 |
if used_code:
|
106 |
# Clean up the content by removing any end code tags
|
107 |
+
content = re.sub(
|
108 |
+
r"```.*?\n", "", content
|
109 |
+
) # Remove existing code blocks
|
110 |
+
content = re.sub(
|
111 |
+
r"\s*<end_code>\s*", "", content
|
112 |
+
) # Remove end_code tags
|
113 |
content = content.strip()
|
114 |
if not content.startswith("```python"):
|
115 |
content = f"```python\n{content}\n```"
|
|
|
135 |
yield gr.ChatMessage(
|
136 |
role="assistant",
|
137 |
content=f"{log_content}",
|
138 |
+
metadata={
|
139 |
+
"title": "📝 Execution Logs",
|
140 |
+
"parent_id": parent_id,
|
141 |
+
"status": "done",
|
142 |
+
},
|
143 |
)
|
144 |
|
145 |
# Nesting any errors under the tool call
|
|
|
147 |
yield gr.ChatMessage(
|
148 |
role="assistant",
|
149 |
content=str(step_log.error),
|
150 |
+
metadata={
|
151 |
+
"title": "💥 Error",
|
152 |
+
"parent_id": parent_id,
|
153 |
+
"status": "done",
|
154 |
+
},
|
155 |
)
|
156 |
|
157 |
# Update parent message metadata to done status without yielding a new message
|
|
|
159 |
|
160 |
# Handle standalone errors but not from tool calls
|
161 |
elif hasattr(step_log, "error") and step_log.error is not None:
|
162 |
+
yield gr.ChatMessage(
|
163 |
+
role="assistant",
|
164 |
+
content=str(step_log.error),
|
165 |
+
metadata={"title": "💥 Error"},
|
166 |
+
)
|
167 |
|
168 |
# Calculate duration and token information
|
169 |
step_footnote = f"{step_number}"
|
170 |
+
if hasattr(step_log, "input_token_count") and hasattr(
|
171 |
+
step_log, "output_token_count"
|
172 |
+
):
|
173 |
+
token_str = f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}"
|
174 |
step_footnote += token_str
|
175 |
if hasattr(step_log, "duration"):
|
176 |
+
step_duration = (
|
177 |
+
f" | Duration: {round(float(step_log.duration), 2)}"
|
178 |
+
if step_log.duration
|
179 |
+
else None
|
180 |
+
)
|
181 |
step_footnote += step_duration
|
182 |
step_footnote = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
|
183 |
yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
|
|
|
200 |
total_input_tokens = 0
|
201 |
total_output_tokens = 0
|
202 |
|
203 |
+
for step_log in agent.run(
|
204 |
+
task, stream=True, reset=reset_agent_memory, additional_args=additional_args
|
205 |
+
):
|
206 |
# Track tokens if model provides them
|
207 |
if hasattr(agent.model, "last_input_token_count"):
|
208 |
total_input_tokens += agent.model.last_input_token_count
|
|
|
218 |
|
219 |
final_answer = step_log # Last log is the run's final_answer
|
220 |
final_answer = handle_agent_output_types(final_answer)
|
|
|
221 |
if isinstance(final_answer, AgentText):
|
222 |
yield gr.ChatMessage(
|
223 |
role="assistant",
|
|
|
228 |
role="assistant",
|
229 |
content={"path": final_answer.to_string(), "mime_type": "image/png"},
|
230 |
)
|
231 |
+
elif isinstance(final_answer, list) and all(
|
232 |
+
isinstance(img, AgentImage) for img in final_answer
|
233 |
+
):
|
234 |
+
for img in final_answer:
|
235 |
+
yield gr.ChatMessage(
|
236 |
+
role="assistant",
|
237 |
+
content={"path": img.to_string(), "mime_type": "image/png"},
|
238 |
+
)
|
239 |
elif isinstance(final_answer, AgentAudio):
|
240 |
yield gr.ChatMessage(
|
241 |
role="assistant",
|
242 |
content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
|
243 |
)
|
244 |
else:
|
245 |
+
yield gr.ChatMessage(
|
246 |
+
role="assistant", content=f"**Final answer:** {str(final_answer)}"
|
247 |
+
)
|
248 |
|
249 |
|
250 |
class GradioUI:
|
|
|
314 |
sanitized_name = "".join(sanitized_name)
|
315 |
|
316 |
# Save the uploaded file to the specified folder
|
317 |
+
file_path = os.path.join(
|
318 |
+
self.file_upload_folder, os.path.basename(sanitized_name)
|
319 |
+
)
|
320 |
shutil.copy(file.name, file_path)
|
321 |
|
322 |
+
return gr.Textbox(
|
323 |
+
f"File uploaded: {file_path}", visible=True
|
324 |
+
), file_uploads_log + [file_path]
|
325 |
|
326 |
def log_user_message(self, text_input, file_uploads_log):
|
327 |
return (
|
|
|
353 |
# If an upload folder is provided, enable the upload feature
|
354 |
if self.file_upload_folder is not None:
|
355 |
upload_file = gr.File(label="Upload a file")
|
356 |
+
upload_status = gr.Textbox(
|
357 |
+
label="Upload Status", interactive=False, visible=False
|
358 |
+
)
|
359 |
upload_file.change(
|
360 |
self.upload_file,
|
361 |
[upload_file, file_uploads_log],
|
|
|
371 |
demo.launch(debug=True, share=True, **kwargs)
|
372 |
|
373 |
|
374 |
+
__all__ = ["stream_to_gradio", "GradioUI"]
|
README.md
CHANGED
@@ -8,11 +8,33 @@ sdk_version: 5.15.0
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
tags:
|
11 |
-
- smolagents
|
12 |
-
- agent
|
13 |
-
- smolagent
|
14 |
-
- tool
|
15 |
-
- agent-course
|
16 |
---
|
17 |
|
18 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
tags:
|
11 |
+
- smolagents
|
12 |
+
- agent
|
13 |
+
- smolagent
|
14 |
+
- tool
|
15 |
+
- agent-course
|
16 |
---
|
17 |
|
18 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
19 |
+
|
20 |
+
## To run the app locally:
|
21 |
+
|
22 |
+
```bash
|
23 |
+
source .venv/bin/activate
|
24 |
+
|
25 |
+
pip install -r requirements.txt
|
26 |
+
|
27 |
+
python app.py
|
28 |
+
```
|
29 |
+
|
30 |
+
Make sure you have installed huggingface_hub:
|
31 |
+
|
32 |
+
```bash
|
33 |
+
pip install --upgrade huggingface_hub
|
34 |
+
```
|
35 |
+
|
36 |
+
## Supported prompts:
|
37 |
+
|
38 |
+
- Generate a postcard with a greeting: <greeting>
|
39 |
+
- Generate postcard comic story images for the following greeting: <greeting>
|
40 |
+
Add all images to the final answer.
|
app.py
CHANGED
@@ -10,10 +10,10 @@ from Gradio_UI import GradioUI
|
|
10 |
|
11 |
|
12 |
@tool
|
13 |
-
def
|
14 |
-
"""A tool that creates the color palette of a given
|
15 |
Args:
|
16 |
-
|
17 |
"""
|
18 |
model = HfApiModel(
|
19 |
max_tokens=2096,
|
@@ -27,14 +27,14 @@ def get_color_palette_based_on_emotion(emotion: str) -> str:
|
|
27 |
"role": "system",
|
28 |
"content": (
|
29 |
"ROLE: you are a cute color palette generator. "
|
30 |
-
"TASK: take the
|
31 |
"OUTPUT: a list of 6 colors in hex format. "
|
32 |
"CONSTRAINTS: Do not add any words or explanations. Just return the list of colors."
|
33 |
),
|
34 |
},
|
35 |
{
|
36 |
"role": "user",
|
37 |
-
"content":
|
38 |
},
|
39 |
]
|
40 |
response = model(messages, stop_sequences=["END"])
|
@@ -65,7 +65,7 @@ def get_postcard_prompt_based_on_color_palette_and_greeting(
|
|
65 |
"TASK: take the color palette and greeting and create a postcard image prompt which simbolses the greeting. "
|
66 |
"Use simple vector shapes, and pastel colors from the color palette. "
|
67 |
"OUTPUT: a postcard image prompt. "
|
68 |
-
"CONSTRAINTS: Use only the colors from the color palette. Include the greeting into the image prompt so it would be rendered as a text. "
|
69 |
),
|
70 |
},
|
71 |
{
|
@@ -77,6 +77,39 @@ def get_postcard_prompt_based_on_color_palette_and_greeting(
|
|
77 |
return response.content
|
78 |
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
final_answer = FinalAnswerTool()
|
81 |
|
82 |
|
@@ -101,8 +134,9 @@ agent = CodeAgent(
|
|
101 |
model=model,
|
102 |
tools=[
|
103 |
final_answer,
|
104 |
-
|
105 |
get_postcard_prompt_based_on_color_palette_and_greeting,
|
|
|
106 |
image_generation_tool,
|
107 |
], ## add your tools here (don't remove final answer)
|
108 |
max_steps=6,
|
|
|
10 |
|
11 |
|
12 |
@tool
|
13 |
+
def get_color_palette_based_on_emotions(emotions: str) -> str:
|
14 |
+
"""A tool that creates the color palette of a given emotions.
|
15 |
Args:
|
16 |
+
emotions: the emotions to create the color palette of.
|
17 |
"""
|
18 |
model = HfApiModel(
|
19 |
max_tokens=2096,
|
|
|
27 |
"role": "system",
|
28 |
"content": (
|
29 |
"ROLE: you are a cute color palette generator. "
|
30 |
+
"TASK: take the emotions and create a color palette which will complement the emotions. Use only pastel colors. "
|
31 |
"OUTPUT: a list of 6 colors in hex format. "
|
32 |
"CONSTRAINTS: Do not add any words or explanations. Just return the list of colors."
|
33 |
),
|
34 |
},
|
35 |
{
|
36 |
"role": "user",
|
37 |
+
"content": emotions,
|
38 |
},
|
39 |
]
|
40 |
response = model(messages, stop_sequences=["END"])
|
|
|
65 |
"TASK: take the color palette and greeting and create a postcard image prompt which simbolses the greeting. "
|
66 |
"Use simple vector shapes, and pastel colors from the color palette. "
|
67 |
"OUTPUT: a postcard image prompt. "
|
68 |
+
"CONSTRAINTS: Use only the colors from the color palette. Include the greeting into the image prompt so it would be rendered as a text. The prompt should be properly formatted as a string and contain escape characters for new lines, quotes and special characters. "
|
69 |
),
|
70 |
},
|
71 |
{
|
|
|
77 |
return response.content
|
78 |
|
79 |
|
80 |
+
@tool
|
81 |
+
def get_comic_story_series_prompts_based_on_greeting(greeting: str) -> str:
|
82 |
+
"""A tool that generates a series of 4 comic-style postcard prompts based on a greeting, creating a sequential story to generate 4 postcards images.
|
83 |
+
Args:
|
84 |
+
greeting: the greeting to create the image prompts of.
|
85 |
+
"""
|
86 |
+
model = HfApiModel(
|
87 |
+
max_tokens=2096,
|
88 |
+
temperature=0.5,
|
89 |
+
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
90 |
+
custom_role_conversions=None,
|
91 |
+
)
|
92 |
+
|
93 |
+
messages = [
|
94 |
+
{
|
95 |
+
"role": "system",
|
96 |
+
"content": (
|
97 |
+
"ROLE: you are a cute story writer for a postcard series. "
|
98 |
+
"TASK: take the greeting and create a series of 4 postcard image prompts which simbolses the greeting. "
|
99 |
+
"Use simple vector shapes, and pastel colors. "
|
100 |
+
"OUTPUT: a series of 4 postcard image prompts. "
|
101 |
+
"CONSTRAINTS: Use only the colors from the color palette. Use simple texts to create the story. The prompt should be properly formatted as a string and contain escape characters for new lines, quotes and special characters. "
|
102 |
+
),
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"role": "user",
|
106 |
+
"content": f"Greeting: {greeting}",
|
107 |
+
},
|
108 |
+
]
|
109 |
+
response = model(messages, stop_sequences=["END"])
|
110 |
+
return response.content
|
111 |
+
|
112 |
+
|
113 |
final_answer = FinalAnswerTool()
|
114 |
|
115 |
|
|
|
134 |
model=model,
|
135 |
tools=[
|
136 |
final_answer,
|
137 |
+
get_color_palette_based_on_emotions,
|
138 |
get_postcard_prompt_based_on_color_palette_and_greeting,
|
139 |
+
get_comic_story_series_prompts_based_on_greeting,
|
140 |
image_generation_tool,
|
141 |
], ## add your tools here (don't remove final answer)
|
142 |
max_steps=6,
|