First_agent

Sleeping

App Files Files Community

VDC commited on Mar 2

Commit

c5599c3

1 Parent(s): 3e9e130

added imagen gen

Browse files

Files changed (3) hide show

.idea/.gitignore +3 -0
.idea/vcs.xml +4 -0
app.py +37 -2

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings" defaultProject="true" />
+</project>

app.py CHANGED Viewed

@@ -4,10 +4,45 @@ import requests
 import pytz
 import yaml
 from tools.final_answer import FinalAnswerTool
 from Gradio_UI import GradioUI
 # Below is an example of a tool that does nothing. Amaze us with your creativity !
 @tool
 def generate_image_from_prompt(prompt: str) -> str:
     """Generates an image from a text prompt.
@@ -54,7 +89,7 @@ with open("prompts.yaml", 'r') as stream:
 agent = CodeAgent(
     model=model,
-    tools=[final_answer, get_current_time_in_timezone], ## add your tools here (don't remove final answer)
     max_steps=6,
     verbosity_level=1,
     grammar=None,

 import pytz
 import yaml
 from tools.final_answer import FinalAnswerTool
 from Gradio_UI import GradioUI
+from diffusers import StableDiffusionPipeline
+import torch
+from io import BytesIO
+import base64
 # Below is an example of a tool that does nothing. Amaze us with your creativity !
+@tool
+class ImageGenerator:
+    def __init__(self, model_id="runwayml/stable-diffusion-v1-5", device="cuda" if torch.cuda.is_available() else "cpu"):
+        self.pipeline = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32).to(device)
+        self.device = device
+    def generate_image(self, prompt, num_inference_steps=25, guidance_scale=7.5):
+        """Generates an image from a text prompt."""
+        image = self.pipeline(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images[0]
+        return image
+    def generate_base64_image(self, prompt, num_inference_steps=25, guidance_scale=7.5):
+        """Generates a base64 encoded image from a text prompt."""
+        image = self.generate_image(prompt, num_inference_steps, guidance_scale)
+        buffered = BytesIO()
+        image.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        return img_str
+def generate_image_tool(image_generator):
+    """Creates a tool function for image generation."""
+    def image_generation_tool(prompt):
+        """Generates an image from a prompt."""
+        return image_generator.generate_base64_image(prompt)
+    return image_generation_tool
+# Initialize the ImageGenerator and tool
+image_generator = ImageGenerator()
+image_generation_tool_function = generate_image_tool(image_generator)
 @tool
 def generate_image_from_prompt(prompt: str) -> str:
     """Generates an image from a text prompt.
 agent = CodeAgent(
     model=model,
+    tools=[final_answer, generate_image_from_prompt, get_current_time_in_timezone], ## add your tools here (don't remove final answer)
     max_steps=6,
     verbosity_level=1,
     grammar=None,