arenukvern commited on
Commit
50606af
·
1 Parent(s): 2d84b2a

feat: comic postcards generation

Browse files
Files changed (3) hide show
  1. Gradio_UI.py +100 -22
  2. README.md +27 -5
  3. app.py +41 -7
Gradio_UI.py CHANGED
@@ -19,12 +19,45 @@ import re
19
  import shutil
20
  from typing import Optional
21
 
22
- from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types
 
 
 
 
 
 
 
23
  from smolagents.agents import ActionStep, MultiStepAgent
24
  from smolagents.memory import MemoryStep
25
  from smolagents.utils import _is_package_available
26
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def pull_messages_from_step(
29
  step_log: MemoryStep,
30
  ):
@@ -33,7 +66,9 @@ def pull_messages_from_step(
33
 
34
  if isinstance(step_log, ActionStep):
35
  # Output the step number
36
- step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else ""
 
 
37
  yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")
38
 
39
  # First yield the thought/reasoning from the LLM
@@ -41,9 +76,15 @@ def pull_messages_from_step(
41
  # Clean up the LLM output
42
  model_output = step_log.model_output.strip()
43
  # Remove any trailing <end_code> and extra backticks, handling multiple possible formats
44
- model_output = re.sub(r"```\s*<end_code>", "```", model_output) # handles ```<end_code>
45
- model_output = re.sub(r"<end_code>\s*```", "```", model_output) # handles <end_code>```
46
- model_output = re.sub(r"```\s*\n\s*<end_code>", "```", model_output) # handles ```\n<end_code>
 
 
 
 
 
 
47
  model_output = model_output.strip()
48
  yield gr.ChatMessage(role="assistant", content=model_output)
49
 
@@ -63,8 +104,12 @@ def pull_messages_from_step(
63
 
64
  if used_code:
65
  # Clean up the content by removing any end code tags
66
- content = re.sub(r"```.*?\n", "", content) # Remove existing code blocks
67
- content = re.sub(r"\s*<end_code>\s*", "", content) # Remove end_code tags
 
 
 
 
68
  content = content.strip()
69
  if not content.startswith("```python"):
70
  content = f"```python\n{content}\n```"
@@ -90,7 +135,11 @@ def pull_messages_from_step(
90
  yield gr.ChatMessage(
91
  role="assistant",
92
  content=f"{log_content}",
93
- metadata={"title": "📝 Execution Logs", "parent_id": parent_id, "status": "done"},
 
 
 
 
94
  )
95
 
96
  # Nesting any errors under the tool call
@@ -98,7 +147,11 @@ def pull_messages_from_step(
98
  yield gr.ChatMessage(
99
  role="assistant",
100
  content=str(step_log.error),
101
- metadata={"title": "💥 Error", "parent_id": parent_id, "status": "done"},
 
 
 
 
102
  )
103
 
104
  # Update parent message metadata to done status without yielding a new message
@@ -106,17 +159,25 @@ def pull_messages_from_step(
106
 
107
  # Handle standalone errors but not from tool calls
108
  elif hasattr(step_log, "error") and step_log.error is not None:
109
- yield gr.ChatMessage(role="assistant", content=str(step_log.error), metadata={"title": "💥 Error"})
 
 
 
 
110
 
111
  # Calculate duration and token information
112
  step_footnote = f"{step_number}"
113
- if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"):
114
- token_str = (
115
- f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}"
116
- )
117
  step_footnote += token_str
118
  if hasattr(step_log, "duration"):
119
- step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None
 
 
 
 
120
  step_footnote += step_duration
121
  step_footnote = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
122
  yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
@@ -139,7 +200,9 @@ def stream_to_gradio(
139
  total_input_tokens = 0
140
  total_output_tokens = 0
141
 
142
- for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
 
 
143
  # Track tokens if model provides them
144
  if hasattr(agent.model, "last_input_token_count"):
145
  total_input_tokens += agent.model.last_input_token_count
@@ -155,7 +218,6 @@ def stream_to_gradio(
155
 
156
  final_answer = step_log # Last log is the run's final_answer
157
  final_answer = handle_agent_output_types(final_answer)
158
-
159
  if isinstance(final_answer, AgentText):
160
  yield gr.ChatMessage(
161
  role="assistant",
@@ -166,13 +228,23 @@ def stream_to_gradio(
166
  role="assistant",
167
  content={"path": final_answer.to_string(), "mime_type": "image/png"},
168
  )
 
 
 
 
 
 
 
 
169
  elif isinstance(final_answer, AgentAudio):
170
  yield gr.ChatMessage(
171
  role="assistant",
172
  content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
173
  )
174
  else:
175
- yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
 
 
176
 
177
 
178
  class GradioUI:
@@ -242,10 +314,14 @@ class GradioUI:
242
  sanitized_name = "".join(sanitized_name)
243
 
244
  # Save the uploaded file to the specified folder
245
- file_path = os.path.join(self.file_upload_folder, os.path.basename(sanitized_name))
 
 
246
  shutil.copy(file.name, file_path)
247
 
248
- return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path]
 
 
249
 
250
  def log_user_message(self, text_input, file_uploads_log):
251
  return (
@@ -277,7 +353,9 @@ class GradioUI:
277
  # If an upload folder is provided, enable the upload feature
278
  if self.file_upload_folder is not None:
279
  upload_file = gr.File(label="Upload a file")
280
- upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
 
 
281
  upload_file.change(
282
  self.upload_file,
283
  [upload_file, file_uploads_log],
@@ -293,4 +371,4 @@ class GradioUI:
293
  demo.launch(debug=True, share=True, **kwargs)
294
 
295
 
296
- __all__ = ["stream_to_gradio", "GradioUI"]
 
19
  import shutil
20
  from typing import Optional
21
 
22
+ from smolagents.agent_types import (
23
+ AgentAudio,
24
+ AgentImage,
25
+ AgentText,
26
+ ImageType,
27
+ is_torch_available,
28
+ AgentType,
29
+ )
30
  from smolagents.agents import ActionStep, MultiStepAgent
31
  from smolagents.memory import MemoryStep
32
  from smolagents.utils import _is_package_available
33
 
34
 
35
+ _AGENT_TYPE_MAPPING = {"string": AgentText, "image": AgentImage, "audio": AgentAudio}
36
+
37
+
38
+ def handle_agent_output_types(output, output_type=None):
39
+ if output_type in _AGENT_TYPE_MAPPING:
40
+ # If the class has defined outputs, we can map directly according to the class definition
41
+ decoded_outputs = _AGENT_TYPE_MAPPING[output_type](output)
42
+ return decoded_outputs
43
+
44
+ # If the class does not have defined output, then we map according to the type
45
+ if isinstance(output, str):
46
+ return AgentText(output)
47
+ if isinstance(output, ImageType):
48
+ return AgentImage(output)
49
+ if isinstance(output, (list, tuple)) and all(
50
+ isinstance(item, ImageType) for item in output
51
+ ):
52
+ return [AgentImage(img) for img in output]
53
+ if is_torch_available():
54
+ import torch
55
+
56
+ if isinstance(output, torch.Tensor):
57
+ return AgentAudio(output)
58
+ return output
59
+
60
+
61
  def pull_messages_from_step(
62
  step_log: MemoryStep,
63
  ):
 
66
 
67
  if isinstance(step_log, ActionStep):
68
  # Output the step number
69
+ step_number = (
70
+ f"Step {step_log.step_number}" if step_log.step_number is not None else ""
71
+ )
72
  yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")
73
 
74
  # First yield the thought/reasoning from the LLM
 
76
  # Clean up the LLM output
77
  model_output = step_log.model_output.strip()
78
  # Remove any trailing <end_code> and extra backticks, handling multiple possible formats
79
+ model_output = re.sub(
80
+ r"```\s*<end_code>", "```", model_output
81
+ ) # handles ```<end_code>
82
+ model_output = re.sub(
83
+ r"<end_code>\s*```", "```", model_output
84
+ ) # handles <end_code>```
85
+ model_output = re.sub(
86
+ r"```\s*\n\s*<end_code>", "```", model_output
87
+ ) # handles ```\n<end_code>
88
  model_output = model_output.strip()
89
  yield gr.ChatMessage(role="assistant", content=model_output)
90
 
 
104
 
105
  if used_code:
106
  # Clean up the content by removing any end code tags
107
+ content = re.sub(
108
+ r"```.*?\n", "", content
109
+ ) # Remove existing code blocks
110
+ content = re.sub(
111
+ r"\s*<end_code>\s*", "", content
112
+ ) # Remove end_code tags
113
  content = content.strip()
114
  if not content.startswith("```python"):
115
  content = f"```python\n{content}\n```"
 
135
  yield gr.ChatMessage(
136
  role="assistant",
137
  content=f"{log_content}",
138
+ metadata={
139
+ "title": "📝 Execution Logs",
140
+ "parent_id": parent_id,
141
+ "status": "done",
142
+ },
143
  )
144
 
145
  # Nesting any errors under the tool call
 
147
  yield gr.ChatMessage(
148
  role="assistant",
149
  content=str(step_log.error),
150
+ metadata={
151
+ "title": "💥 Error",
152
+ "parent_id": parent_id,
153
+ "status": "done",
154
+ },
155
  )
156
 
157
  # Update parent message metadata to done status without yielding a new message
 
159
 
160
  # Handle standalone errors but not from tool calls
161
  elif hasattr(step_log, "error") and step_log.error is not None:
162
+ yield gr.ChatMessage(
163
+ role="assistant",
164
+ content=str(step_log.error),
165
+ metadata={"title": "💥 Error"},
166
+ )
167
 
168
  # Calculate duration and token information
169
  step_footnote = f"{step_number}"
170
+ if hasattr(step_log, "input_token_count") and hasattr(
171
+ step_log, "output_token_count"
172
+ ):
173
+ token_str = f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}"
174
  step_footnote += token_str
175
  if hasattr(step_log, "duration"):
176
+ step_duration = (
177
+ f" | Duration: {round(float(step_log.duration), 2)}"
178
+ if step_log.duration
179
+ else None
180
+ )
181
  step_footnote += step_duration
182
  step_footnote = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
183
  yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
 
200
  total_input_tokens = 0
201
  total_output_tokens = 0
202
 
203
+ for step_log in agent.run(
204
+ task, stream=True, reset=reset_agent_memory, additional_args=additional_args
205
+ ):
206
  # Track tokens if model provides them
207
  if hasattr(agent.model, "last_input_token_count"):
208
  total_input_tokens += agent.model.last_input_token_count
 
218
 
219
  final_answer = step_log # Last log is the run's final_answer
220
  final_answer = handle_agent_output_types(final_answer)
 
221
  if isinstance(final_answer, AgentText):
222
  yield gr.ChatMessage(
223
  role="assistant",
 
228
  role="assistant",
229
  content={"path": final_answer.to_string(), "mime_type": "image/png"},
230
  )
231
+ elif isinstance(final_answer, list) and all(
232
+ isinstance(img, AgentImage) for img in final_answer
233
+ ):
234
+ for img in final_answer:
235
+ yield gr.ChatMessage(
236
+ role="assistant",
237
+ content={"path": img.to_string(), "mime_type": "image/png"},
238
+ )
239
  elif isinstance(final_answer, AgentAudio):
240
  yield gr.ChatMessage(
241
  role="assistant",
242
  content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
243
  )
244
  else:
245
+ yield gr.ChatMessage(
246
+ role="assistant", content=f"**Final answer:** {str(final_answer)}"
247
+ )
248
 
249
 
250
  class GradioUI:
 
314
  sanitized_name = "".join(sanitized_name)
315
 
316
  # Save the uploaded file to the specified folder
317
+ file_path = os.path.join(
318
+ self.file_upload_folder, os.path.basename(sanitized_name)
319
+ )
320
  shutil.copy(file.name, file_path)
321
 
322
+ return gr.Textbox(
323
+ f"File uploaded: {file_path}", visible=True
324
+ ), file_uploads_log + [file_path]
325
 
326
  def log_user_message(self, text_input, file_uploads_log):
327
  return (
 
353
  # If an upload folder is provided, enable the upload feature
354
  if self.file_upload_folder is not None:
355
  upload_file = gr.File(label="Upload a file")
356
+ upload_status = gr.Textbox(
357
+ label="Upload Status", interactive=False, visible=False
358
+ )
359
  upload_file.change(
360
  self.upload_file,
361
  [upload_file, file_uploads_log],
 
371
  demo.launch(debug=True, share=True, **kwargs)
372
 
373
 
374
+ __all__ = ["stream_to_gradio", "GradioUI"]
README.md CHANGED
@@ -8,11 +8,33 @@ sdk_version: 5.15.0
8
  app_file: app.py
9
  pinned: false
10
  tags:
11
- - smolagents
12
- - agent
13
- - smolagent
14
- - tool
15
- - agent-course
16
  ---
17
 
18
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  app_file: app.py
9
  pinned: false
10
  tags:
11
+ - smolagents
12
+ - agent
13
+ - smolagent
14
+ - tool
15
+ - agent-course
16
  ---
17
 
18
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
19
+
20
+ ## To run the app locally:
21
+
22
+ ```bash
23
+ source .venv/bin/activate
24
+
25
+ pip install -r requirements.txt
26
+
27
+ python app.py
28
+ ```
29
+
30
+ Make sure you have installed huggingface_hub:
31
+
32
+ ```bash
33
+ pip install --upgrade huggingface_hub
34
+ ```
35
+
36
+ ## Supported prompts:
37
+
38
+ - Generate a postcard with a greeting: <greeting>
39
+ - Generate postcard comic story images for the following greeting: <greeting>
40
+ Add all images to the final answer.
app.py CHANGED
@@ -10,10 +10,10 @@ from Gradio_UI import GradioUI
10
 
11
 
12
  @tool
13
- def get_color_palette_based_on_emotion(emotion: str) -> str:
14
- """A tool that creates the color palette of a given emotion.
15
  Args:
16
- emotion: the emotion to create the color palette of.
17
  """
18
  model = HfApiModel(
19
  max_tokens=2096,
@@ -27,14 +27,14 @@ def get_color_palette_based_on_emotion(emotion: str) -> str:
27
  "role": "system",
28
  "content": (
29
  "ROLE: you are a cute color palette generator. "
30
- "TASK: take the emotion and create a color palette which will complement the emotion. Use only pastel colors. "
31
  "OUTPUT: a list of 6 colors in hex format. "
32
  "CONSTRAINTS: Do not add any words or explanations. Just return the list of colors."
33
  ),
34
  },
35
  {
36
  "role": "user",
37
- "content": emotion,
38
  },
39
  ]
40
  response = model(messages, stop_sequences=["END"])
@@ -65,7 +65,7 @@ def get_postcard_prompt_based_on_color_palette_and_greeting(
65
  "TASK: take the color palette and greeting and create a postcard image prompt which simbolses the greeting. "
66
  "Use simple vector shapes, and pastel colors from the color palette. "
67
  "OUTPUT: a postcard image prompt. "
68
- "CONSTRAINTS: Use only the colors from the color palette. Include the greeting into the image prompt so it would be rendered as a text. "
69
  ),
70
  },
71
  {
@@ -77,6 +77,39 @@ def get_postcard_prompt_based_on_color_palette_and_greeting(
77
  return response.content
78
 
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  final_answer = FinalAnswerTool()
81
 
82
 
@@ -101,8 +134,9 @@ agent = CodeAgent(
101
  model=model,
102
  tools=[
103
  final_answer,
104
- get_color_palette_based_on_emotion,
105
  get_postcard_prompt_based_on_color_palette_and_greeting,
 
106
  image_generation_tool,
107
  ], ## add your tools here (don't remove final answer)
108
  max_steps=6,
 
10
 
11
 
12
  @tool
13
+ def get_color_palette_based_on_emotions(emotions: str) -> str:
14
+ """A tool that creates the color palette of a given emotions.
15
  Args:
16
+ emotions: the emotions to create the color palette of.
17
  """
18
  model = HfApiModel(
19
  max_tokens=2096,
 
27
  "role": "system",
28
  "content": (
29
  "ROLE: you are a cute color palette generator. "
30
+ "TASK: take the emotions and create a color palette which will complement the emotions. Use only pastel colors. "
31
  "OUTPUT: a list of 6 colors in hex format. "
32
  "CONSTRAINTS: Do not add any words or explanations. Just return the list of colors."
33
  ),
34
  },
35
  {
36
  "role": "user",
37
+ "content": emotions,
38
  },
39
  ]
40
  response = model(messages, stop_sequences=["END"])
 
65
  "TASK: take the color palette and greeting and create a postcard image prompt which simbolses the greeting. "
66
  "Use simple vector shapes, and pastel colors from the color palette. "
67
  "OUTPUT: a postcard image prompt. "
68
+ "CONSTRAINTS: Use only the colors from the color palette. Include the greeting into the image prompt so it would be rendered as a text. The prompt should be properly formatted as a string and contain escape characters for new lines, quotes and special characters. "
69
  ),
70
  },
71
  {
 
77
  return response.content
78
 
79
 
80
+ @tool
81
+ def get_comic_story_series_prompts_based_on_greeting(greeting: str) -> str:
82
+ """A tool that generates a series of 4 comic-style postcard prompts based on a greeting, creating a sequential story to generate 4 postcards images.
83
+ Args:
84
+ greeting: the greeting to create the image prompts of.
85
+ """
86
+ model = HfApiModel(
87
+ max_tokens=2096,
88
+ temperature=0.5,
89
+ model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
90
+ custom_role_conversions=None,
91
+ )
92
+
93
+ messages = [
94
+ {
95
+ "role": "system",
96
+ "content": (
97
+ "ROLE: you are a cute story writer for a postcard series. "
98
+ "TASK: take the greeting and create a series of 4 postcard image prompts which simbolses the greeting. "
99
+ "Use simple vector shapes, and pastel colors. "
100
+ "OUTPUT: a series of 4 postcard image prompts. "
101
+ "CONSTRAINTS: Use only the colors from the color palette. Use simple texts to create the story. The prompt should be properly formatted as a string and contain escape characters for new lines, quotes and special characters. "
102
+ ),
103
+ },
104
+ {
105
+ "role": "user",
106
+ "content": f"Greeting: {greeting}",
107
+ },
108
+ ]
109
+ response = model(messages, stop_sequences=["END"])
110
+ return response.content
111
+
112
+
113
  final_answer = FinalAnswerTool()
114
 
115
 
 
134
  model=model,
135
  tools=[
136
  final_answer,
137
+ get_color_palette_based_on_emotions,
138
  get_postcard_prompt_based_on_color_palette_and_greeting,
139
+ get_comic_story_series_prompts_based_on_greeting,
140
  image_generation_tool,
141
  ], ## add your tools here (don't remove final answer)
142
  max_steps=6,