stevenbucaille commited on
Commit
3014996
·
0 Parent(s):

add app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PIL
2
+ import gradio as gr
3
+ from agents.all_agents import get_master_agent
4
+ from llm import get_default_model
5
+ from smolagents.gradio_ui import stream_to_gradio
6
+
7
+ gr.set_static_paths(paths=["images/"])
8
+
9
+ master_agent = get_master_agent(get_default_model())
10
+ print(master_agent)
11
+
12
+
13
+ def chat_interface_fn(input_request, history):
14
+ message = input_request["text"]
15
+ image_paths = input_request["files"]
16
+ print(message)
17
+ print(image_paths)
18
+ print(history)
19
+ prompt = f"""
20
+ You are given a message and possibly some images.
21
+ The images are already loaded in the variable "images".
22
+
23
+ The message is:
24
+ {message}
25
+
26
+ You can use the following tools to perform tasks on the image:
27
+ - object_detection_tool: to detect objects in an image, you must provide the image to the agents.
28
+ - object_detection_model_retriever: to retrieve object detection models, you must provide the type of class that a model can detect.
29
+
30
+ If you don't know what model to use, you can use the object_detection_model_retriever tool to retrieve the model.
31
+ Never assume an invented model name, always use the model name provided by the object_detection_model_retriever tool.
32
+
33
+ Whenever you need to use a tool, first write the tool call in the form of a code block.
34
+ Then, wait for the tool to return the result.
35
+ Then, use the result to perform the task. Step by step.
36
+
37
+ Before your final answer, if you have any images to show, store them in the "final_images" variable.
38
+ Always return a text of what you did.
39
+ """
40
+
41
+ if image_paths is not None and len(image_paths) > 0:
42
+ images = []
43
+ resized_images = []
44
+ for image_path in image_paths:
45
+ image = PIL.Image.open(image_path)
46
+ # Get original dimensions
47
+ width, height = image.size
48
+
49
+ # Calculate new dimensions while maintaining aspect ratio
50
+ if width > 1200 or height > 800:
51
+ ratio = min(1200 / width, 800 / height)
52
+ new_width = int(width * ratio)
53
+ new_height = int(height * ratio)
54
+ resized_image = image.resize(
55
+ (new_width, new_height), PIL.Image.Resampling.LANCZOS
56
+ )
57
+ resized_images.append(resized_image)
58
+ images.append(image)
59
+
60
+ for message in stream_to_gradio(
61
+ master_agent,
62
+ task=prompt,
63
+ task_images=resized_images,
64
+ additional_args={"images": images},
65
+ reset_agent_memory=False,
66
+ ):
67
+ history.append(message)
68
+ yield history, None
69
+
70
+ final_images = master_agent.python_executor.state.get("final_images", [])
71
+ yield history, final_images
72
+
73
+
74
+ with gr.Blocks() as demo:
75
+ output_gallery = gr.Gallery(label="Output Gallery", type="pil")
76
+ gr.ChatInterface(
77
+ chat_interface_fn,
78
+ type="messages",
79
+ multimodal=True,
80
+ textbox=gr.MultimodalTextbox(
81
+ {
82
+ "text": "Draw a bbox around each car in the image",
83
+ "files": [
84
+ {
85
+ "url": "https://upload.wikimedia.org/wikipedia/commons/5/51/Crossing_the_Hudson_River_on_the_George_Washington_Bridge_from_Fort_Lee%2C_New_Jersey_to_Manhattan%2C_New_York_%287237796950%29.jpg",
86
+ "path": "images/image.jpg",
87
+ "name": "image.jpg",
88
+ }
89
+ ],
90
+ }
91
+ ),
92
+ additional_outputs=[output_gallery],
93
+ )
94
+
95
+
96
+ demo.launch()