Spaces:

itsindrabudhik
/

cvfinalproject24251

Sleeping

App Files Files Community

itsindrabudhik commited on Dec 25, 2024

Commit

eeac268

verified ·

1 Parent(s): 940d094

Create app.py

Browse files

Files changed (1) hide show

app.py +302 -0

app.py ADDED Viewed

	@@ -0,0 +1,302 @@

+from dataclasses import dataclass, replace
+from functools import reduce
+from io import BytesIO
+import math
+import os
+from pprint import pprint
+import tempfile
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+import cv2
+import seaborn as sns
+import matplotlib.pyplot as plt
+%matplotlib inline
+import torch
+from torch.utils.data import Dataset
+import torchvision
+from torchvision import transforms
+import roboflow
+from roboflow import Roboflow
+import supervision as sv
+import albumentations as A
+import gradio as gr
+import requests
+# from torchmetrics.detection.mean_ap import MeanAveragePrecision
+# from torchmetrics.detection.iou import IntersectionOverUnion
+# import evaluate
+#from datasets import load_metric
+from transformers import pipeline
+from transformers import (
+    AutoProcessor,
+    AutoImageProcessor,
+    AutoModel,
+    AutoModelForObjectDetection,
+    RTDetrForObjectDetection,
+    RTDetrImageProcessor,
+    TrainingArguments,
+    Trainer
+)
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file
+#@title Utilities
+PALETTE = {0: {"color": (255, 0, 0),
+               "name": "Ambulance"},
+           1: {"color": (0, 191, 0),
+               "name": "Firetruck"},
+           2: {"color": (0, 0, 255),
+               "name": "Police"},
+           3: {"color": (255, 0, 255),
+               "name": "Non-EV"}}
+label2id = {val["name"]: id for (id, val) in PALETTE.items()}
+id2label = {id: name for (name, id) in label2id.items()}
+print(label2id)
+print(id2label)
+def unnormalize_bbox(img_h, img_w, bbox):
+  x_min = bbox[0] - bbox[2]/2
+  y_min = bbox[1] - bbox[3]/2
+  x_max = bbox[0] + bbox[2]/2 # - x_min
+  y_max = bbox[1] + bbox[3]/2 # - y_min
+  x_min *= img_w
+  y_min *= img_h
+  x_max *= img_w
+  y_max *= img_h
+  x_min, y_min, x_max, y_max = list(map(int, [x_min, y_min, x_max, y_max]))
+  return (x_min, y_min, x_max, y_max)
+def paint_bbox(
+    image,
+    annotations,
+    normalize_labels=True,
+    normalize_bbox=True,
+  ):
+  bboxes = annotations["boxes"].tolist()
+  class_id = annotations["labels"].tolist()
+  confidences = annotations["scores"].tolist()
+  painted_img = image.copy() # Wutdehell
+  for (bbox, label, confidence) in zip(bboxes, class_id, confidences):
+    label = (label - 1) if normalize_labels else label
+    if normalize_bbox:
+      img_h, img_w = image.shape[0], image.shape[1] # H, W, C
+      x_min, y_min, x_max, y_max = unnormalize_bbox(img_h, img_w, bbox)
+      print([x_min, y_min, x_max, y_max])
+      """
+      x_min = #int(bbox[0] - bbox[2]/2) # Left
+      y_min = #int(bbox[1] - bbox[3]/2) # Top
+      x_max = #int(bbox[0] + bbox[2]/2)
+      y_max = #int(bbox[1] + bbox[3]/2)
+      """
+    else:
+      x_min, y_min, x_max, y_max = list(map(int, bbox))
+    box_color = PALETTE[label]["color"]
+    label_name = PALETTE[label]["name"]
+    if confidence != -1:
+      label_name = f"{label_name} ({confidence:.2f})"
+    cv2.rectangle(painted_img,
+                  (x_min, y_min),
+                  (x_max, y_max),
+                  color=box_color,
+                  thickness=2)
+    cv2.rectangle(painted_img,
+                  (x_min, y_min),
+                  (x_min + 5 + len(label_name)*10, y_min + 17),
+                  color=box_color,
+                  thickness=-1)
+    cv2.putText(painted_img,
+                label_name,
+                (x_min + 2, y_min + 12),
+                fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+                fontScale=0.5,
+                color=(255, 255, 255),
+                thickness=1)
+  return painted_img
+# Function to calculate Intersection over Union (IoU)
+def calculate_iou(truth_bbx, pred_bbx):
+    # Coordinates of the boxes: [xmin, ymin, xmax, ymax]
+    x1, y1, x2, y2 = truth_bbx
+    x1_p, y1_p, x2_p, y2_p = pred_bbx
+    # Calculate intersection
+    ixmin = max(x1, x1_p)
+    iymin = max(y1, y1_p)
+    ixmax = min(x2, x2_p)
+    iymax = min(y2, y2_p)
+    iw = max(0, ixmax - ixmin)
+    ih = max(0, iymax - iymin)
+    intersection = iw * ih
+    area1 = (x2 - x1) * (y2 - y1)
+    area2 = (x2_p - x1_p) * (y2_p - y1_p)
+    union = area1 + area2 - intersection
+    iou = intersection / union if union != 0 else 0
+    return iou
+# Example: emotion_classifier = pipeline("image-classification", model="itsindrabudhik/emotion_classification")
+# (Load only once)
+DETECTOR = pipeline("object-detection", model="itsindrabudhik/finalProjectCV2425") #later on, change this with out trained modell yesssss (the trained model should be uploaded to hugging face)
+tensor_file = hf_hub_download(repo_id="itsindrabudhik/finalProjectCV2425",
+                               filename="model.safetensors")
+# Assign classification head weights since that pipeline seems to not handling it
+# weights = load_file(tensor_file)
+# DETECTOR.model.class_labels_classifier.weight.data = weights["class_labels_classifier.weight"]
+# DETECTOR.model.class_labels_classifier.bias.data = weights["class_labels_classifier.bias"]
+# del weights
+def detect_ev_nev(image, confidence_threshold=0.5, iou_threshold=0.5):
+    # Run the detector pipeline on the image
+    results = DETECTOR(image)
+    # Open the image
+    if isinstance(image, str):  # If the image is a URL or file path
+        if image.startswith("http"):
+            response = requests.get(image)
+            img = Image.open(BytesIO(response.content))
+        else:
+            img = Image.open(image)
+    else:
+        img = image
+    # Draw bounding boxes and labels on the image
+    font_path = os.path.join(cv2.__path__[0],'qt','fonts','DejaVuSans.ttf')
+    font = ImageFont.truetype(font_path, size=32)
+    draw = ImageDraw.Draw(img)
+    details = []  # Collect details for text output
+    for result in results:
+        score = result['score']
+        label = result['label']
+        box = result['box']
+        # Apply confidence threshold
+        if score < confidence_threshold:
+            continue
+        # Filter out low IoU detections
+        keep = True
+        for previous_result in results:
+            if previous_result != result:
+                prev_box = previous_result['box']
+                iou = calculate_iou([box['xmin'], box['ymin'], box['xmax'], box['ymax']],
+                                    [prev_box['xmin'], prev_box['ymin'], prev_box['xmax'], prev_box['ymax']])
+                if iou > iou_threshold:
+                    keep = False
+                    break
+        label_color = PALETTE[label2id[label]]["color"]
+        if keep:
+            # Draw the bounding box and label
+            xmin, ymin, xmax, ymax = box['xmin'], box['ymin'], box['xmax'], box['ymax']
+            draw.rectangle([xmin, ymin, xmax, ymax], outline="red", width=3)
+            # Use a larger font size for text
+            text = f"{label} ({score:.2f})"
+            # Calculate text bounding box
+            text_bbox = draw.textbbox((xmin, ymin - 10), text, font=font)  # This gives (xmin, ymin, xmax, ymax)
+            text_width = text_bbox[2] - text_bbox[0]  # width of the text box
+            text_height = text_bbox[3] - text_bbox[1]  # height of the text box
+            # Draw the text on the image (position adjusted)
+            draw.text((xmin, ymin - text_height - 5), text, fill="red", font=font)
+            # Add details to the list
+            details.append({
+                "Label": label,
+                "Confidence": f"{score:.2f}",
+                "Bounding Box": f"({xmin}, {ymin}, {xmax}, {ymax})"
+            })
+    details_text = "\n".join([f"Label: {d['Label']}, Confidence: {d['Confidence']}, Box: {d['Bounding Box']}" for d in details])
+    return img, details_text
+def detect_video(video, confidence_threshold=0.5, iou_threshold=0.5):
+    video_capture = cv2.VideoCapture(video)
+    fps = video_capture.get(cv2.CAP_PROP_FPS)
+    frame_width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
+    frame_height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    out = cv2.VideoWriter(temp_output.name, fourcc, fps, (frame_width, frame_height))
+    details = []
+    total_frames = 0
+    detected_frames = 0
+    while True:
+        ret, frame = video_capture.read()
+        if not ret:
+            break
+        total_frames += 1
+        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        annotated_image, frame_details = detect_ev_nev(image, confidence_threshold, iou_threshold)
+        # Count frames with detections
+        if frame_details.strip():  # Non-empty details indicate detections
+            detected_frames += 1
+        details.append(frame_details)
+        annotated_frame = cv2.cvtColor(np.array(annotated_image), cv2.COLOR_RGB2BGR)
+        out.write(annotated_frame)
+    video_capture.release()
+    out.release()
+    details_text = "\n".join(details)
+    summary = f"Total Frames: {total_frames}, Frames with Detections: {detected_frames}\n" + details_text
+    return temp_output.name, summary
+def detect(file, confidence_threshold=0.5, iou_threshold=0.5):
+    # Determine if input is an image or video
+    file_ext = file.name.split(".")[-1].lower()
+    if file_ext in ["png", "jpg", "jpeg"]:
+        # Image processing
+        annotated_image, details = detect_ev_nev(file, confidence_threshold, iou_threshold)
+        return annotated_image, None, details
+    elif file_ext in ["mp4", "avi", "mov"]:
+        # Video processing
+        processed_video, details = detect_video(file, confidence_threshold, iou_threshold)
+        return None, processed_video, details
+    else:
+        raise ValueError("Unsupported file format. Please upload an image or video.")
+interface = gr.Interface(
+    fn=detect,
+    inputs=[
+        gr.File(label="Upload Image or Video", file_types=[".png", ".jpg", ".jpeg", ".mp4", ".avi", ".mov"]),
+        gr.Slider(0, 1, value=0.5, label="Confidence Threshold"),
+        gr.Slider(0, 1, value=0.5, label="IoU Threshold"),
+    ],
+    outputs=[
+        gr.Image(label="Processed Image"),
+        gr.Video(label="Generated Video"),
+        gr.Text(label="Detection Details")
+    ],
+    title="RT-DETR Object Detection for Images and Videos",
+    description="Upload an image or video to detect objects using the fine-tuned RT-DETR model. Results include the annotated image/video and detection details."
+)
+interface.launch(debug=True)