Aalaa's picture
Update app.py
89f92c5
raw
history blame
4.47 kB
import gradio as gr
import yolov7
import subprocess
import tempfile
import time
from pathlib import Path
import uuid
import cv2
import gradio as gr
def image_fn(
image: gr.inputs.Image = None,
model_path: gr.inputs.Dropdown = None,
image_size: gr.inputs.Slider = 640,
conf_threshold: gr.inputs.Slider = 0.25,
iou_threshold: gr.inputs.Slider = 0.45,
):
"""
YOLOv7 inference function
Args:
image: Input image
model_path: Path to the model
image_size: Image size
conf_threshold: Confidence threshold
iou_threshold: IOU threshold
Returns:
Rendered image
"""
model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False)
model.conf = conf_threshold
model.iou = iou_threshold
results = model([image], size=image_size)
return results.render()[0]
def video_fn(model_path, video_file, conf_thres, iou_thres, start_sec, duration):
model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False)
start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))
suffix = Path(video_file).suffix
clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix)
subprocess.call(
f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split()
)
# Reader of clip file
cap = cv2.VideoCapture(clip_temp_file.name)
# This is an intermediary temp file where we'll write the video to
# Unfortunately, gradio doesn't play too nice with videos rn so we have to do some hackiness
# with ffmpeg at the end of the function here.
with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file:
out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"MP4V"), 30, (1280, 720))
num_frames = 0
max_frames = duration * 30
while cap.isOpened():
try:
ret, frame = cap.read()
if not ret:
break
except Exception as e:
print(e)
continue
print("FRAME DTYPE", type(frame))
out.write(model([frame], conf_thres, iou_thres))
num_frames += 1
print("Processed {} frames".format(num_frames))
if num_frames == max_frames:
break
out.release()
# Aforementioned hackiness
out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False)
subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split())
return out_file.name
image_interface = gr.Interface(
fn=image_fn,
inputs=[
gr.inputs.Image(type="pil", label="Input Image"),
gr.inputs.Dropdown(
choices=[
"Aalaa/Yolov7_Visual_Pollution_Detection",
],
default="Aalaa/Yolov7_Visual_Pollution_Detection",
label="Model",
)
#gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label="Image Size")
#gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"),
#gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold")
],
outputs=gr.outputs.Image(type="filepath", label="Output Image"),
#title="Visual P",
#examples=[['image1.jpg', 'Aalaa/Yolov7_Visual_Pollution_Detection', 640, 0.25, 0.45], ['image2.jpg', 'Aalaa/Yolov7_Visual_Pollution_Detection', 640, 0.25, 0.45], ['image3.jpg', 'Aalaa/Yolov7_Visual_Pollution_Detection', 640, 0.25, 0.45]],
#cache_examples=True,
#theme='huggingface',
)
video_interface = gr.Interface(
fn=video_fn,
inputs=[
gr.inputs.Video(source = "upload", type = "mp4", label = "Input Video"),
gr.inputs.Dropdown(
choices=[
"Aalaa/Yolov7_Visual_Pollution_Detection",
],
default="Aalaa/Yolov7_Visual_Pollution_Detection",
label="Model",
),
],
outputs=gr.outputs.Video(type = "mp4", label = "Output Video"),
# examples=[
# ["video.mp4", 0.25, 0.45, 0, 2],
# ],
title="Smart Environmental Eye (SEE)",
cache_examples=True,
theme='huggingface',
)
if __name__ == "__main__":
gr.TabbedInterface(
[image_interface, video_interface],
["Run on Images", "Run on Videos"],
).launch()