Spaces:
Sleeping
Sleeping
File size: 3,203 Bytes
894a0e9 eee0e91 894a0e9 54e2701 eee0e91 894a0e9 54e2701 eee0e91 54e2701 eee0e91 894a0e9 eee0e91 894a0e9 eee0e91 894a0e9 54e2701 eee0e91 894a0e9 eee0e91 894a0e9 eee0e91 894a0e9 eee0e91 54e2701 eee0e91 894a0e9 eee0e91 894a0e9 eee0e91 894a0e9 eee0e91 894a0e9 54e2701 eee0e91 54e2701 eee0e91 894a0e9 eee0e91 894a0e9 eee0e91 894a0e9 eee0e91 894a0e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import cv2
import torch
from PIL import Image, ImageDraw
import gradio as gr
import pandas as pd
from transformers import pipeline
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
translator = pipeline("translation_en_to_ar", model="Helsinki-NLP/opus-mt-en-ar")
def detect_and_draw_image(input_image):
results = model(input_image)
detections = results.xyxy[0].numpy()
draw = ImageDraw.Draw(input_image)
counts = {}
for detection in detections:
xmin, ymin, xmax, ymax, conf, class_id = detection
label = model.names[int(class_id)]
counts[label] = counts.get(label, 0) + 1
draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2)
draw.text((xmin, ymin), f"{label}: {conf:.2f}", fill="white")
translated_labels = translator(list(counts.keys()))
df = pd.DataFrame({
'Label (English)': list(counts.keys()),
'Label (Arabic)': [t['translation_text'] for t in translated_labels],
'Object Count': list(counts.values())
})
return input_image, df
def detect_and_draw_video(video_path):
cap = cv2.VideoCapture(video_path)
frames = []
overall_counts = {}
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame = cv2.resize(frame, (640, 480))
results = model(frame)
detections = results.xyxy[0].numpy()
for detection in detections:
xmin, ymin, xmax, ymax, conf, class_id = detection
label = model.names[int(class_id)]
overall_counts[label] = overall_counts.get(label, 0) + 1
cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2)
cv2.putText(frame, f"{label}: {conf:.2f}", (int(xmin), int(ymin) - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
frames.append(frame)
cap.release()
output_path = 'output.mp4'
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20.0, (640, 480))
for frame in frames:
out.write(frame)
out.release()
translated_labels = translator(list(overall_counts.keys()))
df = pd.DataFrame({
'Label (English)': list(overall_counts.keys()),
'Label (Arabic)': [t['translation_text'] for t in translated_labels],
'Object Count': list(overall_counts.values())
})
return output_path, df
image_interface = gr.Interface(
fn=detect_and_draw_image,
inputs=gr.Image(type="pil", label="Upload Image"),
outputs=[gr.Image(type="pil"), gr.Dataframe(label="Object Counts")],
title="Object Detection for Images",
description="Upload an image to see the objects detected and their counts."
)
video_interface = gr.Interface(
fn=detect_and_draw_video,
inputs=gr.Video(label="Upload Video"),
outputs=[gr.Video(label="Processed Video"), gr.Dataframe(label="Object Counts")],
title="Object Detection for Videos",
description="Upload a video to see the objects detected and their counts."
)
app = gr.TabbedInterface([image_interface, video_interface], ["Image Detection", "Video Detection"])
app.launch(debug=True)
|