Spaces:

bla
/

yolo-wordl

Running

App Files Files Community

bla commited on Mar 11

Commit

7715eea

verified ·

1 Parent(s): a7d9ced

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -78

app.py CHANGED Viewed

@@ -127,10 +127,10 @@ custom_css = """
 # Available model sizes
 DETECTION_MODELS = {
-    "tiny": "yoloworld-t",
-    "small": "yoloworld-s",
-    "base": "yoloworld-b",
-    "large": "yoloworld-l",
 }
 SEGMENTATION_MODELS = {
@@ -147,11 +147,24 @@ class YOLOWorldDetector:
         self.model_name = DETECTION_MODELS[model_size]
         print(f"Loading {self.model_name} on {self.device}...")
-        self.model = AutoModel.from_pretrained(f"deepdatacloud/{self.model_name}",
-                                              trust_remote_code=True)
-        self.model.to(self.device)
-        self.processor = AutoProcessor.from_pretrained(f"deepdatacloud/{self.model_name}")
-        print("Model loaded successfully!")
         # Segmentation models
         self.seg_models = {}
@@ -162,13 +175,18 @@ class YOLOWorldDetector:
             self.model_name = DETECTION_MODELS[model_size]
             print(f"Loading {self.model_name} on {self.device}...")
-            self.model = AutoModel.from_pretrained(f"deepdatacloud/{self.model_name}",
-                                                  trust_remote_code=True)
-            self.model.to(self.device)
-            self.processor = AutoProcessor.from_pretrained(f"deepdatacloud/{self.model_name}")
-            print("Model loaded successfully!")
         return f"Using {self.model_name} model"
     def load_seg_model(self, model_name):
         if model_name not in self.seg_models:
             print(f"Loading segmentation model {model_name}...")
@@ -180,75 +198,48 @@ class YOLOWorldDetector:
         if image is None:
             return None, "No image provided"
         if isinstance(image, str):
-            image = Image.open(image).convert("RGB")
         elif isinstance(image, np.ndarray):
-            image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
-        # Process inputs
-        inputs = self.processor(text=text_prompt, images=image, return_tensors="pt")
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
         # Run inference
-        with torch.no_grad():
-            outputs = self.model(**inputs)
-        # Process results
-        target_sizes = torch.tensor([image.size[::-1]], device=self.device)
-        results = self.processor.post_process_object_detection(
-            outputs=outputs,
-            target_sizes=target_sizes,
-            threshold=confidence_threshold
-        )[0]
-        # Convert image to numpy for drawing
-        image_np = np.array(image)
-        # Draw bounding boxes
-        for box, score, label in zip(results["boxes"], results["scores"], results["labels"]):
-            box = box.cpu().numpy().astype(int)
-            score = score.cpu().item()
-            label = label.cpu().item()
-            # Get class name from model's config
-            class_name = f"{text_prompt.split(',')[label] if label < len(text_prompt.split(',')) else 'Object'}: {score:.2f}"
-            # Draw rectangle
-            cv2.rectangle(
-                image_np,
-                (box[0], box[1]),
-                (box[2], box[3]),
-                (0, 255, 0),
-                2
-            )
-            # Draw label background
-            text_size = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
-            cv2.rectangle(
-                image_np,
-                (box[0], box[1] - text_size[1] - 5),
-                (box[0] + text_size[0], box[1]),
-                (0, 255, 0),
-                -1
             )
-            # Draw text
-            cv2.putText(
-                image_np,
-                class_name,
-                (box[0], box[1] - 5),
-                cv2.FONT_HERSHEY_SIMPLEX,
-                0.5,
-                (0, 0, 0),
-                2
             )
         # Convert results to JSON format (percentages)
         json_results = []
-        img_height, img_width = image_np.shape[:2]
-        for box, score, label in zip(results["boxes"], results["scores"], results["labels"]):
-            box = box.cpu().numpy()
             x1, y1, x2, y2 = box
             json_results.append({
@@ -258,12 +249,12 @@ class YOLOWorldDetector:
                     "width": ((x2 - x1) / img_width) * 100,
                     "height": ((y2 - y1) / img_height) * 100
                 },
-                "score": float(score.cpu().item()),
-                "label": int(label.cpu().item()),
-                "label_text": text_prompt.split(',')[label] if label < len(text_prompt.split(',')) else 'Object'
             })
-        return cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR), json_results
     def segment(self, image, model_name, confidence_threshold=0.3):
         if image is None:

 # Available model sizes
 DETECTION_MODELS = {
+    "small": "yolov8s-worldv2.pt",
+    "medium": "yolov8m-worldv2.pt",
+    "large": "yolov8l-worldv2.pt",
+    "xlarge": "yolov8x-worldv2.pt",
 }
 SEGMENTATION_MODELS = {
         self.model_name = DETECTION_MODELS[model_size]
         print(f"Loading {self.model_name} on {self.device}...")
+        try:
+            # Use the correct repository ID
+            self.model = AutoModel.from_pretrained(
+                f"IDEA-Research/{self.model_name}",
+                trust_remote_code=True
+            )
+            self.model.to(self.device)
+            self.processor = AutoProcessor.from_pretrained(
+                f"IDEA-Research/{self.model_name}"
+            )
+            print("Model loaded successfully!")
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            print("Falling back to YOLOv8 for detection...")
+            # Fallback to YOLOv8 if YOLOWorld fails to load
+            self.model = None
+            self.processor = None
+            self.fallback_model = YOLO("yolov8n.pt")
         # Segmentation models
         self.seg_models = {}
             self.model_name = DETECTION_MODELS[model_size]
             print(f"Loading {self.model_name} on {self.device}...")
+            try:
+                # Use Ultralytics YOLOWorld model
+                from ultralytics import YOLOWorld
+                self.model = YOLOWorld(self.model_name)
+                print("Model loaded successfully!")
+            except Exception as e:
+                print(f"Error loading YOLOWorld model: {e}")
+                print("Falling back to standard YOLOv8 for detection...")
+                # Fallback to YOLOv8 if YOLOWorld fails to load
+                self.model = YOLO("yolov8n.pt")
         return f"Using {self.model_name} model"
     def load_seg_model(self, model_name):
         if model_name not in self.seg_models:
             print(f"Loading segmentation model {model_name}...")
         if image is None:
             return None, "No image provided"
+        try:
+            # Check if we're using YOLOWorld or standard YOLO
+            from ultralytics import YOLOWorld
+            is_yoloworld = isinstance(self.model, YOLOWorld)
+        except:
+            is_yoloworld = False
+        # Process the image
         if isinstance(image, str):
+            img_for_json = cv2.imread(image)
         elif isinstance(image, np.ndarray):
+            img_for_json = image.copy()
         # Run inference
+        if is_yoloworld:
+            # YOLOWorld supports text prompts
+            results = self.model.predict(
+                source=image,
+                classes=text_prompt.split(','),
+                conf=confidence_threshold,
+                verbose=False
             )
+        else:
+            # Standard YOLO doesn't use text prompts
+            results = self.model.predict(
+                source=image,
+                conf=confidence_threshold,
+                verbose=False
             )
+        # Get the plotted result
+        res_plotted = results[0].plot()
         # Convert results to JSON format (percentages)
         json_results = []
+        img_height, img_width = img_for_json.shape[:2]
+        for i, (box, cls, conf) in enumerate(zip(
+            results[0].boxes.xyxy.cpu().numpy(),
+            results[0].boxes.cls.cpu().numpy(),
+            results[0].boxes.conf.cpu().numpy()
+        )):
             x1, y1, x2, y2 = box
             json_results.append({
                     "width": ((x2 - x1) / img_width) * 100,
                     "height": ((y2 - y1) / img_height) * 100
                 },
+                "score": float(conf),
+                "label": int(cls),
+                "label_text": results[0].names[int(cls)]
             })
+        return res_plotted, json_results
     def segment(self, image, model_name, confidence_threshold=0.3):
         if image is None: