postcard_generator

Paused

App Files Files Community

arenukvern commited on Feb 20

Commit

50606af

1 Parent(s): 2d84b2a

feat: comic postcards generation

Browse files

Files changed (3) hide show

Gradio_UI.py +100 -22
README.md +27 -5
app.py +41 -7

Gradio_UI.py CHANGED Viewed

@@ -19,12 +19,45 @@ import re
 import shutil
 from typing import Optional
-from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types
 from smolagents.agents import ActionStep, MultiStepAgent
 from smolagents.memory import MemoryStep
 from smolagents.utils import _is_package_available
 def pull_messages_from_step(
     step_log: MemoryStep,
 ):
@@ -33,7 +66,9 @@ def pull_messages_from_step(
     if isinstance(step_log, ActionStep):
         # Output the step number
-        step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else ""
         yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")
         # First yield the thought/reasoning from the LLM
@@ -41,9 +76,15 @@ def pull_messages_from_step(
             # Clean up the LLM output
             model_output = step_log.model_output.strip()
             # Remove any trailing <end_code> and extra backticks, handling multiple possible formats
-            model_output = re.sub(r"```\s*<end_code>", "```", model_output)  # handles ```<end_code>
-            model_output = re.sub(r"<end_code>\s*```", "```", model_output)  # handles <end_code>```
-            model_output = re.sub(r"```\s*\n\s*<end_code>", "```", model_output)  # handles ```\n<end_code>
             model_output = model_output.strip()
             yield gr.ChatMessage(role="assistant", content=model_output)
@@ -63,8 +104,12 @@ def pull_messages_from_step(
             if used_code:
                 # Clean up the content by removing any end code tags
-                content = re.sub(r"```.*?\n", "", content)  # Remove existing code blocks
-                content = re.sub(r"\s*<end_code>\s*", "", content)  # Remove end_code tags
                 content = content.strip()
                 if not content.startswith("```python"):
                     content = f"```python\n{content}\n```"
@@ -90,7 +135,11 @@ def pull_messages_from_step(
                     yield gr.ChatMessage(
                         role="assistant",
                         content=f"{log_content}",
-                        metadata={"title": "📝 Execution Logs", "parent_id": parent_id, "status": "done"},
                     )
             # Nesting any errors under the tool call
@@ -98,7 +147,11 @@ def pull_messages_from_step(
                 yield gr.ChatMessage(
                     role="assistant",
                     content=str(step_log.error),
-                    metadata={"title": "💥 Error", "parent_id": parent_id, "status": "done"},
                 )
             # Update parent message metadata to done status without yielding a new message
@@ -106,17 +159,25 @@ def pull_messages_from_step(
         # Handle standalone errors but not from tool calls
         elif hasattr(step_log, "error") and step_log.error is not None:
-            yield gr.ChatMessage(role="assistant", content=str(step_log.error), metadata={"title": "💥 Error"})
         # Calculate duration and token information
         step_footnote = f"{step_number}"
-        if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"):
-            token_str = (
-                f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}"
-            )
             step_footnote += token_str
         if hasattr(step_log, "duration"):
-            step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None
             step_footnote += step_duration
         step_footnote = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
         yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
@@ -139,7 +200,9 @@ def stream_to_gradio(
     total_input_tokens = 0
     total_output_tokens = 0
-    for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
         # Track tokens if model provides them
         if hasattr(agent.model, "last_input_token_count"):
             total_input_tokens += agent.model.last_input_token_count
@@ -155,7 +218,6 @@ def stream_to_gradio(
     final_answer = step_log  # Last log is the run's final_answer
     final_answer = handle_agent_output_types(final_answer)
     if isinstance(final_answer, AgentText):
         yield gr.ChatMessage(
             role="assistant",
@@ -166,13 +228,23 @@ def stream_to_gradio(
             role="assistant",
             content={"path": final_answer.to_string(), "mime_type": "image/png"},
         )
     elif isinstance(final_answer, AgentAudio):
         yield gr.ChatMessage(
             role="assistant",
             content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
         )
     else:
-        yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
 class GradioUI:
@@ -242,10 +314,14 @@ class GradioUI:
         sanitized_name = "".join(sanitized_name)
         # Save the uploaded file to the specified folder
-        file_path = os.path.join(self.file_upload_folder, os.path.basename(sanitized_name))
         shutil.copy(file.name, file_path)
-        return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path]
     def log_user_message(self, text_input, file_uploads_log):
         return (
@@ -277,7 +353,9 @@ class GradioUI:
             # If an upload folder is provided, enable the upload feature
             if self.file_upload_folder is not None:
                 upload_file = gr.File(label="Upload a file")
-                upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
                 upload_file.change(
                     self.upload_file,
                     [upload_file, file_uploads_log],
@@ -293,4 +371,4 @@ class GradioUI:
         demo.launch(debug=True, share=True, **kwargs)
-__all__ = ["stream_to_gradio", "GradioUI"]

 import shutil
 from typing import Optional
+from smolagents.agent_types import (
+    AgentAudio,
+    AgentImage,
+    AgentText,
+    ImageType,
+    is_torch_available,
+    AgentType,
+)
 from smolagents.agents import ActionStep, MultiStepAgent
 from smolagents.memory import MemoryStep
 from smolagents.utils import _is_package_available
+_AGENT_TYPE_MAPPING = {"string": AgentText, "image": AgentImage, "audio": AgentAudio}
+def handle_agent_output_types(output, output_type=None):
+    if output_type in _AGENT_TYPE_MAPPING:
+        # If the class has defined outputs, we can map directly according to the class definition
+        decoded_outputs = _AGENT_TYPE_MAPPING[output_type](output)
+        return decoded_outputs
+    # If the class does not have defined output, then we map according to the type
+    if isinstance(output, str):
+        return AgentText(output)
+    if isinstance(output, ImageType):
+        return AgentImage(output)
+    if isinstance(output, (list, tuple)) and all(
+        isinstance(item, ImageType) for item in output
+    ):
+        return [AgentImage(img) for img in output]
+    if is_torch_available():
+        import torch
+        if isinstance(output, torch.Tensor):
+            return AgentAudio(output)
+    return output
 def pull_messages_from_step(
     step_log: MemoryStep,
 ):
     if isinstance(step_log, ActionStep):
         # Output the step number
+        step_number = (
+            f"Step {step_log.step_number}" if step_log.step_number is not None else ""
+        )
         yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")
         # First yield the thought/reasoning from the LLM
             # Clean up the LLM output
             model_output = step_log.model_output.strip()
             # Remove any trailing <end_code> and extra backticks, handling multiple possible formats
+            model_output = re.sub(
+                r"```\s*<end_code>", "```", model_output
+            )  # handles ```<end_code>
+            model_output = re.sub(
+                r"<end_code>\s*```", "```", model_output
+            )  # handles <end_code>```
+            model_output = re.sub(
+                r"```\s*\n\s*<end_code>", "```", model_output
+            )  # handles ```\n<end_code>
             model_output = model_output.strip()
             yield gr.ChatMessage(role="assistant", content=model_output)
             if used_code:
                 # Clean up the content by removing any end code tags
+                content = re.sub(
+                    r"```.*?\n", "", content
+                )  # Remove existing code blocks
+                content = re.sub(
+                    r"\s*<end_code>\s*", "", content
+                )  # Remove end_code tags
                 content = content.strip()
                 if not content.startswith("```python"):
                     content = f"```python\n{content}\n```"
                     yield gr.ChatMessage(
                         role="assistant",
                         content=f"{log_content}",
+                        metadata={
+                            "title": "📝 Execution Logs",
+                            "parent_id": parent_id,
+                            "status": "done",
+                        },
                     )
             # Nesting any errors under the tool call
                 yield gr.ChatMessage(
                     role="assistant",
                     content=str(step_log.error),
+                    metadata={
+                        "title": "💥 Error",
+                        "parent_id": parent_id,
+                        "status": "done",
+                    },
                 )
             # Update parent message metadata to done status without yielding a new message
         # Handle standalone errors but not from tool calls
         elif hasattr(step_log, "error") and step_log.error is not None:
+            yield gr.ChatMessage(
+                role="assistant",
+                content=str(step_log.error),
+                metadata={"title": "💥 Error"},
+            )
         # Calculate duration and token information
         step_footnote = f"{step_number}"
+        if hasattr(step_log, "input_token_count") and hasattr(
+            step_log, "output_token_count"
+        ):
+            token_str = f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}"
             step_footnote += token_str
         if hasattr(step_log, "duration"):
+            step_duration = (
+                f" | Duration: {round(float(step_log.duration), 2)}"
+                if step_log.duration
+                else None
+            )
             step_footnote += step_duration
         step_footnote = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
         yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
     total_input_tokens = 0
     total_output_tokens = 0
+    for step_log in agent.run(
+        task, stream=True, reset=reset_agent_memory, additional_args=additional_args
+    ):
         # Track tokens if model provides them
         if hasattr(agent.model, "last_input_token_count"):
             total_input_tokens += agent.model.last_input_token_count
     final_answer = step_log  # Last log is the run's final_answer
     final_answer = handle_agent_output_types(final_answer)
     if isinstance(final_answer, AgentText):
         yield gr.ChatMessage(
             role="assistant",
             role="assistant",
             content={"path": final_answer.to_string(), "mime_type": "image/png"},
         )
+    elif isinstance(final_answer, list) and all(
+        isinstance(img, AgentImage) for img in final_answer
+    ):
+        for img in final_answer:
+            yield gr.ChatMessage(
+                role="assistant",
+                content={"path": img.to_string(), "mime_type": "image/png"},
+            )
     elif isinstance(final_answer, AgentAudio):
         yield gr.ChatMessage(
             role="assistant",
             content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
         )
     else:
+        yield gr.ChatMessage(
+            role="assistant", content=f"**Final answer:** {str(final_answer)}"
+        )
 class GradioUI:
         sanitized_name = "".join(sanitized_name)
         # Save the uploaded file to the specified folder
+        file_path = os.path.join(
+            self.file_upload_folder, os.path.basename(sanitized_name)
+        )
         shutil.copy(file.name, file_path)
+        return gr.Textbox(
+            f"File uploaded: {file_path}", visible=True
+        ), file_uploads_log + [file_path]
     def log_user_message(self, text_input, file_uploads_log):
         return (
             # If an upload folder is provided, enable the upload feature
             if self.file_upload_folder is not None:
                 upload_file = gr.File(label="Upload a file")
+                upload_status = gr.Textbox(
+                    label="Upload Status", interactive=False, visible=False
+                )
                 upload_file.change(
                     self.upload_file,
                     [upload_file, file_uploads_log],
         demo.launch(debug=True, share=True, **kwargs)
+__all__ = ["stream_to_gradio", "GradioUI"]

README.md CHANGED Viewed

@@ -8,11 +8,33 @@ sdk_version: 5.15.0
 app_file: app.py
 pinned: false
 tags:
-- smolagents
-- agent
-- smolagent
-- tool
-- agent-course
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 tags:
+  - smolagents
+  - agent
+  - smolagent
+  - tool
+  - agent-course
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## To run the app locally:
+```bash
+source .venv/bin/activate
+pip install -r requirements.txt
+python app.py
+```
+Make sure you have installed huggingface_hub:
+```bash
+pip install --upgrade huggingface_hub
+```
+## Supported prompts:
+- Generate a postcard with a greeting: <greeting>
+- Generate postcard comic story images for the following greeting: <greeting>
+  Add all images to the final answer.

app.py CHANGED Viewed

@@ -10,10 +10,10 @@ from Gradio_UI import GradioUI
 @tool
-def get_color_palette_based_on_emotion(emotion: str) -> str:
-    """A tool that creates the color palette of a given emotion.
     Args:
-        emotion: the emotion to create the color palette of.
     """
     model = HfApiModel(
         max_tokens=2096,
@@ -27,14 +27,14 @@ def get_color_palette_based_on_emotion(emotion: str) -> str:
             "role": "system",
             "content": (
                 "ROLE: you are a cute color palette generator. "
-                "TASK: take the emotion and create a color palette which will complement the emotion. Use only pastel colors. "
                 "OUTPUT: a list of 6 colors in hex format. "
                 "CONSTRAINTS: Do not add any words or explanations. Just return the list of colors."
             ),
         },
         {
             "role": "user",
-            "content": emotion,
         },
     ]
     response = model(messages, stop_sequences=["END"])
@@ -65,7 +65,7 @@ def get_postcard_prompt_based_on_color_palette_and_greeting(
                 "TASK: take the color palette and greeting and create a postcard image prompt which simbolses the greeting. "
                 "Use simple vector shapes, and pastel colors from the color palette. "
                 "OUTPUT: a postcard image prompt. "
-                "CONSTRAINTS: Use only the colors from the color palette. Include the greeting into the image prompt so it would be rendered as a text. "
             ),
         },
         {
@@ -77,6 +77,39 @@ def get_postcard_prompt_based_on_color_palette_and_greeting(
     return response.content
 final_answer = FinalAnswerTool()
@@ -101,8 +134,9 @@ agent = CodeAgent(
     model=model,
     tools=[
         final_answer,
-        get_color_palette_based_on_emotion,
         get_postcard_prompt_based_on_color_palette_and_greeting,
         image_generation_tool,
     ],  ## add your tools here (don't remove final answer)
     max_steps=6,

 @tool
+def get_color_palette_based_on_emotions(emotions: str) -> str:
+    """A tool that creates the color palette of a given emotions.
     Args:
+        emotions: the emotions to create the color palette of.
     """
     model = HfApiModel(
         max_tokens=2096,
             "role": "system",
             "content": (
                 "ROLE: you are a cute color palette generator. "
+                "TASK: take the emotions and create a color palette which will complement the emotions. Use only pastel colors. "
                 "OUTPUT: a list of 6 colors in hex format. "
                 "CONSTRAINTS: Do not add any words or explanations. Just return the list of colors."
             ),
         },
         {
             "role": "user",
+            "content": emotions,
         },
     ]
     response = model(messages, stop_sequences=["END"])
                 "TASK: take the color palette and greeting and create a postcard image prompt which simbolses the greeting. "
                 "Use simple vector shapes, and pastel colors from the color palette. "
                 "OUTPUT: a postcard image prompt. "
+                "CONSTRAINTS: Use only the colors from the color palette. Include the greeting into the image prompt so it would be rendered as a text. The prompt should be properly formatted as a string and contain escape characters for new lines, quotes and special characters. "
             ),
         },
         {
     return response.content
+@tool
+def get_comic_story_series_prompts_based_on_greeting(greeting: str) -> str:
+    """A tool that generates a series of 4 comic-style postcard prompts based on a greeting, creating a sequential story to generate 4 postcards images.
+    Args:
+        greeting: the greeting to create the image prompts of.
+    """
+    model = HfApiModel(
+        max_tokens=2096,
+        temperature=0.5,
+        model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+        custom_role_conversions=None,
+    )
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                "ROLE: you are a cute story writer for a postcard series. "
+                "TASK: take the greeting and create a series of 4 postcard image prompts which simbolses the greeting. "
+                "Use simple vector shapes, and pastel colors. "
+                "OUTPUT: a series of 4 postcard image prompts. "
+                "CONSTRAINTS: Use only the colors from the color palette. Use simple texts to create the story. The prompt should be properly formatted as a string and contain escape characters for new lines, quotes and special characters. "
+            ),
+        },
+        {
+            "role": "user",
+            "content": f"Greeting: {greeting}",
+        },
+    ]
+    response = model(messages, stop_sequences=["END"])
+    return response.content
 final_answer = FinalAnswerTool()
     model=model,
     tools=[
         final_answer,
+        get_color_palette_based_on_emotions,
         get_postcard_prompt_based_on_color_palette_and_greeting,
+        get_comic_story_series_prompts_based_on_greeting,
         image_generation_tool,
     ],  ## add your tools here (don't remove final answer)
     max_steps=6,