Spaces:

bla
/

yolo-wordl

Running

App Files Files Community

bla commited on Mar 11

Commit

92767db

verified ·

1 Parent(s): 6962f1b

Update app.py

Browse files

Files changed (1) hide show

app.py +341 -25

app.py CHANGED Viewed

@@ -122,6 +122,178 @@ custom_css = """
   font-size: 0.875rem;
   color: var(--card-foreground);
   opacity: 0.7;
 }
 """
@@ -336,6 +508,119 @@ class YOLOWorldDetector:
 # Initialize detector with default model
 detector = YOLOWorldDetector(model_size="small")
 def detection_inference(image, text_prompt, confidence, model_size):
     # Update model if needed
     detector.change_model(model_size)
@@ -347,7 +632,14 @@ def detection_inference(image, text_prompt, confidence, model_size):
         confidence_threshold=confidence
     )
-    return result_image, str(json_results)
 def segmentation_inference(image, confidence, model_name):
     # Run segmentation
@@ -357,7 +649,14 @@ def segmentation_inference(image, confidence, model_name):
         confidence_threshold=confidence
     )
-    return result_image, str(json_results)
 # Create Gradio interface
 with gr.Blocks(title="YOLO Vision Suite", css=custom_css) as demo:
@@ -368,10 +667,10 @@ with gr.Blocks(title="YOLO Vision Suite", css=custom_css) as demo:
         with gr.Tabs(elem_classes="tab-nav") as tabs:
             with gr.TabItem("Object Detection", elem_id="detection-tab"):
-                with gr.Row():
-                    with gr.Column(elem_classes="input-panel"):
                         gr.Markdown("### Input")
-                        input_image = gr.Image(label="Upload Image", type="numpy")
                         text_prompt = gr.Textbox(
                             label="Text Prompt",
                             placeholder="person, car, dog",
@@ -394,20 +693,23 @@ with gr.Blocks(title="YOLO Vision Suite", css=custom_css) as demo:
                             )
                         detect_button = gr.Button("Detect Objects", elem_classes="gr-button-primary")
-                    with gr.Column(elem_classes="output-panel"):
                         gr.Markdown("### Results")
-                        output_image = gr.Image(label="Detection Result")
-                        with gr.Accordion("JSON Output", open=False):
                             json_output = gr.Textbox(
                                 label="Bounding Box Data (Percentage Coordinates)",
-                                elem_classes="gr-input"
                             )
             with gr.TabItem("Segmentation", elem_id="segmentation-tab"):
-                with gr.Row():
-                    with gr.Column(elem_classes="input-panel"):
                         gr.Markdown("### Input")
-                        seg_input_image = gr.Image(label="Upload Image", type="numpy")
                         with gr.Row():
                             seg_confidence = gr.Slider(
                                 minimum=0.1,
@@ -424,35 +726,49 @@ with gr.Blocks(title="YOLO Vision Suite", css=custom_css) as demo:
                             )
                         segment_button = gr.Button("Segment Image", elem_classes="gr-button-primary")
-                    with gr.Column(elem_classes="output-panel"):
                         gr.Markdown("### Results")
-                        seg_output_image = gr.Image(label="Segmentation Result")
-                        with gr.Accordion("JSON Output", open=False):
                             seg_json_output = gr.Textbox(
                                 label="Segmentation Data (Percentage Coordinates)",
-                                elem_classes="gr-input"
                             )
         with gr.Column(elem_classes="footer"):
-            gr.Markdown("""
-            ### Tips
-            - For object detection, enter comma-separated text prompts to specify what to detect
-            - For segmentation, the model will identify common objects automatically
-            - Larger models provide better accuracy but require more processing power
-            - The JSON output provides coordinates as percentages of image dimensions, compatible with SVG
-            """)
     # Set up event handlers
     detect_button.click(
         detection_inference,
         inputs=[input_image, text_prompt, confidence, model_dropdown],
-        outputs=[output_image, json_output]
     )
     segment_button.click(
         segmentation_inference,
         inputs=[seg_input_image, seg_confidence, seg_model_dropdown],
-        outputs=[seg_output_image, seg_json_output]
     )
 if __name__ == "__main__":

   font-size: 0.875rem;
   color: var(--card-foreground);
   opacity: 0.7;
+}"""
+# Custom CSS for a more modern UI inspired by NextUI
+custom_css = """
+:root {
+  --primary: #0070f3;
+  --primary-foreground: #ffffff;
+  --background: #f5f5f5;
+  --card: #ffffff;
+  --card-foreground: #111111;
+  --border: #eaeaea;
+  --ring: #0070f3;
+  --shadow: 0 4px 14px 0 rgba(0, 118, 255, 0.1);
+}
+.dark {
+  --primary: #0070f3;
+  --primary-foreground: #ffffff;
+  --background: #000000;
+  --card: #111111;
+  --card-foreground: #ffffff;
+  --border: #333333;
+  --ring: #0070f3;
+}
+.gradio-container {
+  margin: 0 !important;
+  padding: 0 !important;
+  max-width: 100% !important;
+}
+.main-container {
+  background-color: var(--background);
+  padding: 2rem;
+  min-height: 100vh;
+}
+.header {
+  margin-bottom: 2rem;
+  text-align: center;
+}
+.header h1 {
+  font-size: 2.5rem;
+  font-weight: 800;
+  color: var(--card-foreground);
+  margin-bottom: 0.5rem;
+  background: linear-gradient(to right, #0070f3, #00bfff);
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+}
+.header p {
+  color: var(--card-foreground);
+  opacity: 0.8;
+  font-size: 1.1rem;
+}
+.tab-nav {
+  background-color: var(--card);
+  border-radius: var(--radius);
+  padding: 0.5rem;
+  margin-bottom: 2rem;
+  box-shadow: var(--shadow);
+}
+.tab-nav button {
+  border-radius: var(--radius) !important;
+  font-weight: 600 !important;
+  transition: all 0.2s ease-in-out !important;
+  padding: 0.75rem 1.5rem !important;
+}
+.tab-nav button.selected {
+  background-color: var(--primary) !important;
+  color: var(--primary-foreground) !important;
+  transform: translateY(-2px);
+  box-shadow: 0 4px 14px 0 rgba(0, 118, 255, 0.25);
+}
+.input-panel, .output-panel {
+  background-color: var(--card);
+  border-radius: var(--radius);
+  padding: 1.5rem;
+  box-shadow: var(--shadow);
+  height: 100%;
+  display: flex;
+  flex-direction: column;
+}
+.input-panel h3, .output-panel h3 {
+  font-size: 1.25rem;
+  font-weight: 600;
+  margin-bottom: 1rem;
+  color: var(--card-foreground);
+  border-bottom: 2px solid var(--primary);
+  padding-bottom: 0.5rem;
+  display: inline-block;
+}
+.gr-button-primary {
+  background-color: var(--primary) !important;
+  color: var(--primary-foreground) !important;
+  border-radius: var(--radius) !important;
+  font-weight: 600 !important;
+  transition: all 0.2s ease-in-out !important;
+  padding: 0.75rem 1.5rem !important;
+  box-shadow: 0 4px 14px 0 rgba(0, 118, 255, 0.25) !important;
+  width: 100%;
+  margin-top: 1rem;
+}
+.gr-button-primary:hover {
+  transform: translateY(-2px) !important;
+  box-shadow: 0 6px 20px rgba(0, 118, 255, 0.35) !important;
+}
+.gr-form {
+  border: none !important;
+  background: transparent !important;
+}
+.gr-input, .gr-select {
+  border: 1px solid var(--border) !important;
+  border-radius: var(--radius) !important;
+  padding: 0.75rem 1rem !important;
+  transition: all 0.2s ease-in-out !important;
+}
+.gr-input:focus, .gr-select:focus {
+  border-color: var(--primary) !important;
+  box-shadow: 0 0 0 2px rgba(0, 118, 255, 0.25) !important;
+}
+.gr-panel {
+  border: none !important;
+}
+.gr-accordion {
+  border: 1px solid var(--border) !important;
+  border-radius: var(--radius) !important;
+  overflow: hidden;
+}
+.footer {
+  margin-top: 2rem;
+  border-top: 1px solid var(--border);
+  padding-top: 1.5rem;
+  font-size: 0.9rem;
+  color: var(--card-foreground);
+  opacity: 0.7;
+  text-align: center;
+}
+.footer-card {
+  background-color: var(--card);
+  border-radius: var(--radius);
+  padding: 1.5rem;
+  box-shadow: var(--shadow);
+}
+.tips-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+  gap: 1rem;
+  margin-top: 1rem;
+}
+.tip-card {
+  background-color: var(--card);
+  border-radius: var(--radius);
+  padding: 1rem;
+  border-left: 3px solid var(--primary);
 }
 """
 # Initialize detector with default model
 detector = YOLOWorldDetector(model_size="small")
+def create_svg_from_detections(json_results, img_width, img_height):
+    """Convert detection results to SVG format"""
+    svg_header = f'<svg width="{img_width}" height="{img_height}" xmlns="http://www.w3.org/2000/svg">'
+    svg_content = ""
+    # Color palette for different classes
+    colors = [
+        "#FF3B30", "#FF9500", "#FFCC00", "#4CD964",
+        "#5AC8FA", "#007AFF", "#5856D6", "#FF2D55"
+    ]
+    for i, result in enumerate(json_results):
+        bbox = result["bbox"]
+        label = result.get("label_text", f"Object {i}")
+        score = result.get("score", 0)
+        # Convert percentage to absolute coordinates
+        x = (bbox["x"] / 100) * img_width
+        y = (bbox["y"] / 100) * img_height
+        width = (bbox["width"] / 100) * img_width
+        height = (bbox["height"] / 100) * img_height
+        # Select color based on class index
+        color = colors[i % len(colors)]
+        # Create rectangle element
+        svg_content += f'''
+  <rect
+    x="{x:.2f}"
+    y="{y:.2f}"
+    width="{width:.2f}"
+    height="{height:.2f}"
+    stroke="{color}"
+    stroke-width="2"
+    fill="none"
+    data-label="{label}"
+    data-score="{score:.2f}"
+  />
+  <text
+    x="{x:.2f}"
+    y="{y-5:.2f}"
+    font-family="Arial"
+    font-size="12"
+    fill="{color}"
+  >{label} ({score:.2f})</text>'''
+    svg_footer = "\n</svg>"
+    return svg_header + svg_content + svg_footer
+def create_svg_from_segmentation(json_results, img_width, img_height):
+    """Convert segmentation results to SVG format"""
+    svg_header = f'<svg width="{img_width}" height="{img_height}" xmlns="http://www.w3.org/2000/svg">'
+    svg_content = ""
+    # Color palette for different classes
+    colors = [
+        "#FF3B30", "#FF9500", "#FFCC00", "#4CD964",
+        "#5AC8FA", "#007AFF", "#5856D6", "#FF2D55"
+    ]
+    for i, result in enumerate(json_results):
+        label = result.get("label_text", f"Object {i}")
+        score = result.get("score", 0)
+        # Select color based on class index
+        color = colors[i % len(colors)]
+        # Create polygon if available
+        if "polygon" in result:
+            points_str = " ".join([
+                f"{(p['x']/100)*img_width:.2f},{(p['y']/100)*img_height:.2f}"
+                for p in result["polygon"]
+            ])
+            svg_content += f'''
+  <polygon
+    points="{points_str}"
+    stroke="{color}"
+    stroke-width="2"
+    fill="{color}33"
+    data-label="{label}"
+    data-score="{score:.2f}"
+  />'''
+        # Also add bounding box
+        bbox = result["bbox"]
+        x = (bbox["x"] / 100) * img_width
+        y = (bbox["y"] / 100) * img_height
+        width = (bbox["width"] / 100) * img_width
+        height = (bbox["height"] / 100) * img_height
+        svg_content += f'''
+  <rect
+    x="{x:.2f}"
+    y="{y:.2f}"
+    width="{width:.2f}"
+    height="{height:.2f}"
+    stroke="{color}"
+    stroke-width="1"
+    fill="none"
+    stroke-dasharray="5,5"
+  />
+  <text
+    x="{x:.2f}"
+    y="{y-5:.2f}"
+    font-family="Arial"
+    font-size="12"
+    fill="{color}"
+  >{label} ({score:.2f})</text>'''
+    svg_footer = "\n</svg>"
+    return svg_header + svg_content + svg_footer
 def detection_inference(image, text_prompt, confidence, model_size):
     # Update model if needed
     detector.change_model(model_size)
         confidence_threshold=confidence
     )
+    # Create SVG from detection results
+    if isinstance(json_results, list) and len(json_results) > 0:
+        img_height, img_width = result_image.shape[:2]
+        svg_output = create_svg_from_detections(json_results, img_width, img_height)
+    else:
+        svg_output = "<svg></svg>"
+    return result_image, str(json_results), svg_output
 def segmentation_inference(image, confidence, model_name):
     # Run segmentation
         confidence_threshold=confidence
     )
+    # Create SVG from segmentation results
+    if isinstance(json_results, list) and len(json_results) > 0:
+        img_height, img_width = result_image.shape[:2]
+        svg_output = create_svg_from_segmentation(json_results, img_width, img_height)
+    else:
+        svg_output = "<svg></svg>"
+    return result_image, str(json_results), svg_output
 # Create Gradio interface
 with gr.Blocks(title="YOLO Vision Suite", css=custom_css) as demo:
         with gr.Tabs(elem_classes="tab-nav") as tabs:
             with gr.TabItem("Object Detection", elem_id="detection-tab"):
+                with gr.Row(equal_height=True):
+                    with gr.Column(elem_classes="input-panel", scale=1):
                         gr.Markdown("### Input")
+                        input_image = gr.Image(label="Upload Image", type="numpy", height=300)
                         text_prompt = gr.Textbox(
                             label="Text Prompt",
                             placeholder="person, car, dog",
                             )
                         detect_button = gr.Button("Detect Objects", elem_classes="gr-button-primary")
+                    with gr.Column(elem_classes="output-panel", scale=1):
                         gr.Markdown("### Results")
+                        output_image = gr.Image(label="Detection Result", height=300)
+                        with gr.Accordion("SVG Output", open=False, elem_classes="gr-accordion"):
+                            svg_output = gr.HTML(label="SVG Visualization")
+                        with gr.Accordion("JSON Output", open=False, elem_classes="gr-accordion"):
                             json_output = gr.Textbox(
                                 label="Bounding Box Data (Percentage Coordinates)",
+                                elem_classes="gr-input",
+                                lines=5
                             )
             with gr.TabItem("Segmentation", elem_id="segmentation-tab"):
+                with gr.Row(equal_height=True):
+                    with gr.Column(elem_classes="input-panel", scale=1):
                         gr.Markdown("### Input")
+                        seg_input_image = gr.Image(label="Upload Image", type="numpy", height=300)
                         with gr.Row():
                             seg_confidence = gr.Slider(
                                 minimum=0.1,
                             )
                         segment_button = gr.Button("Segment Image", elem_classes="gr-button-primary")
+                    with gr.Column(elem_classes="output-panel", scale=1):
                         gr.Markdown("### Results")
+                        seg_output_image = gr.Image(label="Segmentation Result", height=300)
+                        with gr.Accordion("SVG Output", open=False, elem_classes="gr-accordion"):
+                            seg_svg_output = gr.HTML(label="SVG Visualization")
+                        with gr.Accordion("JSON Output", open=False, elem_classes="gr-accordion"):
                             seg_json_output = gr.Textbox(
                                 label="Segmentation Data (Percentage Coordinates)",
+                                elem_classes="gr-input",
+                                lines=5
                             )
         with gr.Column(elem_classes="footer"):
+            with gr.Column(elem_classes="footer-card"):
+                gr.Markdown("### Tips & Information")
+                with gr.Row(elem_classes="tips-grid"):
+                    with gr.Column(elem_classes="tip-card"):
+                        gr.Markdown("**Detection**")
+                        gr.Markdown("Enter comma-separated text prompts to specify what objects to detect")
+                    with gr.Column(elem_classes="tip-card"):
+                        gr.Markdown("**Segmentation**")
+                        gr.Markdown("The model will identify and segment common objects automatically")
+                    with gr.Column(elem_classes="tip-card"):
+                        gr.Markdown("**Models**")
+                        gr.Markdown("Larger models provide better accuracy but require more processing power")
+                    with gr.Column(elem_classes="tip-card"):
+                        gr.Markdown("**Output**")
+                        gr.Markdown("JSON output provides coordinates as percentages, compatible with SVG")
     # Set up event handlers
     detect_button.click(
         detection_inference,
         inputs=[input_image, text_prompt, confidence, model_dropdown],
+        outputs=[output_image, json_output, svg_output]
     )
     segment_button.click(
         segmentation_inference,
         inputs=[seg_input_image, seg_confidence, seg_model_dropdown],
+        outputs=[seg_output_image, seg_json_output, seg_svg_output]
     )
 if __name__ == "__main__":