Spaces:

Zengyf-CVer
/

Gradio_YOLOv5_Det_v4

Runtime error

App Files Files Community

Gradio_YOLOv5_Det_v4 / app.py

Zengyf-CVer

app update

108dfa3 over 2 years ago

raw

history blame contribute delete

23.5 kB

	# Gradio YOLOv5 Det v0.4
	# author: Zeng Yifu（曾逸夫）
	# creation time: 2022-05-28
	# email: [email protected]
	# project homepage: https://gitee.com/CV_Lab/gradio_yolov5_det

	import argparse
	import sys
	import csv
	csv.field_size_limit(sys.maxsize)

	import gc
	import json
	import os
	from collections import Counter
	from pathlib import Path

	import cv2
	import gradio as gr
	import numpy as np
	import pandas as pd
	import torch
	import yaml
	from PIL import Image, ImageDraw, ImageFont

	from util.fonts_opt import is_fonts
	from util.pdf_opt import pdf_generate

	ROOT_PATH = sys.path[0] # root directory

	# model path
	model_path = "ultralytics/yolov5"

	# Gradio YOLOv5 Det version
	GYD_VERSION = "Gradio YOLOv5 Det v0.4"

	# model name temporary variable
	model_name_tmp = ""

	# Device temporary variables
	device_tmp = ""

	# File extension
	suffix_list = [".csv", ".yaml"]

	# font size
	FONTSIZE = 25

	# object style
	obj_style = ["Small Object", "Medium Object", "Large Object"]


	def parse_args(known=False):
	parser = argparse.ArgumentParser(description="Gradio YOLOv5 Det v0.4")
	parser.add_argument("--source", "-src", default="upload", type=str, help="input source")
	parser.add_argument("--source_video", "-src_v", default="webcam", type=str, help="video input source")
	parser.add_argument("--img_tool", "-it", default="editor", type=str, help="input image tool")
	parser.add_argument("--model_name", "-mn", default="yolov5s", type=str, help="model name")
	parser.add_argument(
	"--model_cfg",
	"-mc",
	default="./model_config/model_name_p5_p6_all.yaml",
	type=str,
	help="model config",
	)
	parser.add_argument(
	"--cls_name",
	"-cls",
	default="./cls_name/cls_name_en.yaml",
	type=str,
	help="cls name",
	)
	parser.add_argument(
	"--nms_conf",
	"-conf",
	default=0.5,
	type=float,
	help="model NMS confidence threshold",
	)
	parser.add_argument("--nms_iou", "-iou", default=0.45, type=float, help="model NMS IoU threshold")
	parser.add_argument(
	"--device",
	"-dev",
	default="cpu",
	type=str,
	help="cuda or cpu",
	)
	parser.add_argument("--inference_size", "-isz", default=640, type=int, help="model inference size")
	parser.add_argument("--max_detnum", "-mdn", default=50, type=float, help="model max det num")
	parser.add_argument("--slider_step", "-ss", default=0.05, type=float, help="slider step")
	parser.add_argument(
	"--is_login",
	"-isl",
	action="store_true",
	default=False,
	help="is login",
	)
	parser.add_argument('--usr_pwd',
	"-up",
	nargs='+',
	type=str,
	default=["admin", "admin"],
	help="user & password for login")
	parser.add_argument(
	"--is_share",
	"-is",
	action="store_true",
	default=False,
	help="is login",
	)

	args = parser.parse_known_args()[0] if known else parser.parse_args()
	return args


	# yaml file parsing
	def yaml_parse(file_path):
	return yaml.safe_load(open(file_path, encoding="utf-8").read())


	# yaml csv file parsing
	def yaml_csv(file_path, file_tag):
	file_suffix = Path(file_path).suffix
	if file_suffix == suffix_list[0]:
	# model name
	file_names = [i[0] for i in list(csv.reader(open(file_path)))] # csv version
	elif file_suffix == suffix_list[1]:
	# model name
	file_names = yaml_parse(file_path).get(file_tag) # yaml version
	else:
	print(f"{file_path} is not in the correct format! Program exits!")
	sys.exit()

	return file_names


	# model loading
	def model_loading(model_name, device, opt=[]):

	# 加载本地模型
	try:
	# load model
	model = torch.hub.load(model_path,
	model_name,
	force_reload=[True if "refresh_yolov5" in opt else False][0],
	device=device,
	_verbose=False)
	except Exception as e:
	print(e)
	else:
	print(f"🚀 welcome to {GYD_VERSION}，{model_name} loaded successfully!")

	return model


	# check information
	def export_json(results, img_size):

	return [[{
	"ID": i,
	"CLASS": int(result[i][5]),
	"CLASS_NAME": model_cls_name_cp[int(result[i][5])],
	"BOUNDING_BOX": {
	"XMIN": round(result[i][:4].tolist()[0], 6),
	"YMIN": round(result[i][:4].tolist()[1], 6),
	"XMAX": round(result[i][:4].tolist()[2], 6),
	"YMAX": round(result[i][:4].tolist()[3], 6),},
	"CONF": round(float(result[i][4]), 2),
	"FPS": round(1000 / float(results.t[1]), 2),
	"IMG_WIDTH": img_size[0],
	"IMG_HEIGHT": img_size[1],} for i in range(len(result))] for result in results.xyxyn]


	# frame conversion
	def pil_draw(img, countdown_msg, textFont, xyxy, font_size, opt, obj_cls_index, color_list):

	img_pil = ImageDraw.Draw(img)

	img_pil.rectangle(xyxy, fill=None, outline=color_list[obj_cls_index]) # bounding box

	if "label" in opt:
	text_w, text_h = textFont.getsize(countdown_msg) # Label size

	img_pil.rectangle(
	(xyxy[0], xyxy[1], xyxy[0] + text_w, xyxy[1] + text_h),
	fill=color_list[obj_cls_index],
	outline=color_list[obj_cls_index],
	) # label background

	img_pil.multiline_text(
	(xyxy[0], xyxy[1]),
	countdown_msg,
	fill=(255, 255, 255),
	font=textFont,
	align="center",
	)

	return img


	# Label and bounding box color settings
	def color_set(cls_num):
	color_list = []
	for i in range(cls_num):
	color = tuple(np.random.choice(range(256), size=3))
	# color = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])]
	color_list.append(color)

	return color_list


	# YOLOv5 image detection function
	def yolo_det_img(img, device, model_name, infer_size, conf, iou, max_num, model_cls, opt):

	global model, model_name_tmp, device_tmp

	# object size num
	s_obj, m_obj, l_obj = 0, 0, 0
	# object area list
	area_obj_all = []
	# cls num stat
	cls_det_stat = []

	if model_name_tmp != model_name:
	# Model judgment to avoid repeated loading
	model_name_tmp = model_name
	print(f"Loading model {model_name_tmp}......")
	model = model_loading(model_name_tmp, device, opt)
	elif device_tmp != device:
	# Device judgment to avoid repeated loading
	device_tmp = device
	print(f"Loading model {model_name_tmp}......")
	model = model_loading(model_name_tmp, device, opt)
	else:
	print(f"Loading model {model_name_tmp}......")
	model = model_loading(model_name_tmp, device, opt)

	# -------------Model tuning -------------
	model.conf = conf # NMS confidence threshold
	model.iou = iou # NMS IoU threshold
	model.max_det = int(max_num) # Maximum number of detection frames
	model.classes = model_cls # model classes

	color_list = color_set(len(model_cls_name_cp)) # 设置颜色

	img_size = img.size # frame size

	results = model(img, size=infer_size) # detection

	# ----------------目标裁剪----------------
	crops = results.crop(save=False)
	img_crops = []
	for i in range(len(crops)):
	img_crops.append(crops[i]["im"][..., ::-1])

	# Data Frame
	dataframe = results.pandas().xyxy[0].round(2)

	det_csv = "./Det_Report.csv"
	det_excel = "./Det_Report.xlsx"

	if "csv" in opt:
	dataframe.to_csv(det_csv, index=False)
	else:
	det_csv = None

	if "excel" in opt:
	dataframe.to_excel(det_excel, sheet_name='sheet1', index=False)
	else:
	det_excel = None

	# ----------------Load fonts----------------
	yaml_index = cls_name.index(".yaml")
	cls_name_lang = cls_name[yaml_index - 2:yaml_index]

	if cls_name_lang == "zh":
	# Chinese
	textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/SimSun.ttf"), size=FONTSIZE)
	elif cls_name_lang in ["en", "ru", "es", "ar"]:
	# English, Russian, Spanish, Arabic
	textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/TimesNewRoman.ttf"), size=FONTSIZE)
	elif cls_name_lang == "ko":
	# Korean
	textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/malgun.ttf"), size=FONTSIZE)

	for result in results.xyxyn:
	for i in range(len(result)):
	id = int(i) # instance ID
	obj_cls_index = int(result[i][5]) # category index
	obj_cls = model_cls_name_cp[obj_cls_index] # category
	cls_det_stat.append(obj_cls)

	# ------------ border coordinates ------------
	x0 = float(result[i][:4].tolist()[0])
	y0 = float(result[i][:4].tolist()[1])
	x1 = float(result[i][:4].tolist()[2])
	y1 = float(result[i][:4].tolist()[3])

	# ------------ Actual coordinates of the border ------------
	x0 = int(img_size[0] * x0)
	y0 = int(img_size[1] * y0)
	x1 = int(img_size[0] * x1)
	y1 = int(img_size[1] * y1)

	conf = float(result[i][4]) # confidence
	# fps = f"{(1000 / float(results.t[1])):.2f}" # FPS

	det_img = pil_draw(
	img,
	f"{id}-{obj_cls}:{conf:.2f}",
	textFont,
	[x0, y0, x1, y1],
	FONTSIZE,
	opt,
	obj_cls_index,
	color_list,
	)

	# ----------add object size----------
	w_obj = x1 - x0
	h_obj = y1 - y0
	area_obj = w_obj * h_obj
	area_obj_all.append(area_obj)

	# ------------JSON generate------------
	det_json = export_json(results, img.size)[0] # Detection information
	det_json_format = json.dumps(det_json, sort_keys=False, indent=4, separators=(",", ":"),
	ensure_ascii=False) # JSON formatting

	if "json" not in opt:
	det_json = None

	# -------PDF generate-------
	report = "./Det_Report.pdf"
	if "pdf" in opt:
	pdf_generate(f"{det_json_format}", report, GYD_VERSION)
	else:
	report = None

	# --------------object size compute--------------
	for i in range(len(area_obj_all)):
	if (0 < area_obj_all[i] <= 32 ** 2):
	s_obj = s_obj + 1
	elif (32 2 < area_obj_all[i] <= 96 2):
	m_obj = m_obj + 1
	elif (area_obj_all[i] > 96 ** 2):
	l_obj = l_obj + 1

	sml_obj_total = s_obj + m_obj + l_obj

	objSize_dict = {obj_style[i]: [s_obj, m_obj, l_obj][i] / sml_obj_total for i in range(3)}

	# ------------cls stat------------
	clsRatio_dict = {}
	clsDet_dict = Counter(cls_det_stat)
	clsDet_dict_sum = sum(clsDet_dict.values())

	for k, v in clsDet_dict.items():
	clsRatio_dict[k] = v / clsDet_dict_sum

	return det_img, img_crops, objSize_dict, clsRatio_dict, dataframe, det_json, report, det_csv, det_excel


	# YOLOv5 video detection function
	def yolo_det_video(video, device, model_name, infer_size, conf, iou, max_num, model_cls, opt):

	global model, model_name_tmp, device_tmp

	os.system("""
	if [ -e './output.mp4' ]; then
	rm ./output.mp4
	fi
	""")

	if model_name_tmp != model_name:
	# Model judgment to avoid repeated loading
	model_name_tmp = model_name
	print(f"Loading model {model_name_tmp}......")
	model = model_loading(model_name_tmp, device, opt)
	elif device_tmp != device:
	# Device judgment to avoid repeated loading
	device_tmp = device
	print(f"Loading model {model_name_tmp}......")
	model = model_loading(model_name_tmp, device, opt)
	else:
	print(f"Loading model {model_name_tmp}......")
	model = model_loading(model_name_tmp, device, opt)

	# -------------Model tuning -------------
	model.conf = conf # NMS confidence threshold
	model.iou = iou # NMS IOU threshold
	model.max_det = int(max_num) # Maximum number of detection frames
	model.classes = model_cls # model classes

	color_list = color_set(len(model_cls_name_cp)) # 设置颜色

	# ----------------Load fonts----------------
	yaml_index = cls_name.index(".yaml")
	cls_name_lang = cls_name[yaml_index - 2:yaml_index]

	if cls_name_lang == "zh":
	# Chinese
	textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/SimSun.ttf"), size=FONTSIZE)
	elif cls_name_lang in ["en", "ru", "es", "ar"]:
	# English, Russian, Spanish, Arabic
	textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/TimesNewRoman.ttf"), size=FONTSIZE)
	elif cls_name_lang == "ko":
	# Korean
	textFont = ImageFont.truetype(str(f"{ROOT_PATH}/fonts/malgun.ttf"), size=FONTSIZE)

	# video->frame
	gc.collect()
	output_video_path = "./output.avi"
	cap = cv2.VideoCapture(video)
	fourcc = cv2.VideoWriter_fourcc(*"I420") # encoder

	out = cv2.VideoWriter(output_video_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))
	while cap.isOpened():
	ret, frame = cap.read()
	# Determine empty frame
	if not ret:
	break

	results = model(frame, size=infer_size) # detection
	h, w, _ = frame.shape # frame size
	img_size = (w, h) # frame size

	for result in results.xyxyn:
	for i in range(len(result)):
	id = int(i) # instance ID
	obj_cls_index = int(result[i][5]) # category index
	obj_cls = model_cls_name_cp[obj_cls_index] # category

	# ------------ border coordinates ------------
	x0 = float(result[i][:4].tolist()[0])
	y0 = float(result[i][:4].tolist()[1])
	x1 = float(result[i][:4].tolist()[2])
	y1 = float(result[i][:4].tolist()[3])

	# ------------ Actual coordinates of the border ------------
	x0 = int(img_size[0] * x0)
	y0 = int(img_size[1] * y0)
	x1 = int(img_size[0] * x1)
	y1 = int(img_size[1] * y1)

	conf = float(result[i][4]) # confidence
	# fps = f"{(1000 / float(results.t[1])):.2f}" # FPS

	frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
	frame = pil_draw(
	frame,
	f"{id}-{obj_cls}:{conf:.2f}",
	textFont,
	[x0, y0, x1, y1],
	FONTSIZE,
	opt,
	obj_cls_index,
	color_list,
	)

	frame = cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR)

	# frame->video
	out.write(frame)
	out.release()
	cap.release()
	# cv2.destroyAllWindows()

	return output_video_path


	def main(args):
	gr.close_all()

	global model, model_cls_name_cp, cls_name

	source = args.source
	source_video = args.source_video
	img_tool = args.img_tool
	nms_conf = args.nms_conf
	nms_iou = args.nms_iou
	model_name = args.model_name
	model_cfg = args.model_cfg
	cls_name = args.cls_name
	device = args.device
	inference_size = args.inference_size
	max_detnum = args.max_detnum
	slider_step = args.slider_step
	is_login = args.is_login
	usr_pwd = args.usr_pwd
	is_share = args.is_share

	is_fonts(f"{ROOT_PATH}/fonts") # Check font files

	# model loading
	model = model_loading(model_name, device)

	model_names = yaml_csv(model_cfg, "model_names") # model names
	model_cls_name = yaml_csv(cls_name, "model_cls_name") # class name

	model_cls_name_cp = model_cls_name.copy() # class name

	# ------------------- Input Components -------------------
	inputs_img = gr.Image(image_mode="RGB", source=source, tool=img_tool, type="pil", label="original image")
	inputs_device01 = gr.Radio(choices=["cuda:0", "cpu"], value=device, label="device")
	inputs_model01 = gr.Dropdown(choices=model_names, value=model_name, type="value", label="model")
	inputs_size01 = gr.Radio(choices=[320, 640, 1280], value=inference_size, label="inference size")
	input_conf01 = gr.Slider(0, 1, step=slider_step, value=nms_conf, label="confidence threshold")
	inputs_iou01 = gr.Slider(0, 1, step=slider_step, value=nms_iou, label="IoU threshold")
	inputs_maxnum01 = gr.Number(value=max_detnum, label="Maximum number of detections")
	inputs_clsName01 = gr.CheckboxGroup(choices=model_cls_name, value=model_cls_name, type="index", label="category")
	inputs_opt01 = gr.CheckboxGroup(choices=["refresh_yolov5", "label", "pdf", "json", "csv", "excel"],
	value=["label", "pdf"],
	type="value",
	label="operate")

	# ------------------- Input Components -------------------
	inputs_video = gr.Video(format="mp4", source=source_video, label="original video") # webcam
	inputs_device02 = gr.Radio(choices=["cuda:0", "cpu"], value=device, label="device")
	inputs_model02 = gr.Dropdown(choices=model_names, value=model_name, type="value", label="model")
	inputs_size02 = gr.Radio(choices=[320, 640, 1280], value=inference_size, label="inference size")
	input_conf02 = gr.Slider(0, 1, step=slider_step, value=nms_conf, label="confidence threshold")
	inputs_iou02 = gr.Slider(0, 1, step=slider_step, value=nms_iou, label="IoU threshold")
	inputs_maxnum02 = gr.Number(value=max_detnum, label="Maximum number of detections")
	inputs_clsName02 = gr.CheckboxGroup(choices=model_cls_name, value=model_cls_name, type="index", label="category")
	inputs_opt02 = gr.CheckboxGroup(choices=["refresh_yolov5", "label"], value=["label"], type="value", label="operate")

	# Input parameters
	inputs_img_list = [
	inputs_img, # input image
	inputs_device01, # device
	inputs_model01, # model
	inputs_size01, # inference size
	input_conf01, # confidence threshold
	inputs_iou01, # IoU threshold
	inputs_maxnum01, # maximum number of detections
	inputs_clsName01, # category
	inputs_opt01, # detect operations
	]

	inputs_video_list = [
	inputs_video, # input image
	inputs_device02, # device
	inputs_model02, # model
	inputs_size02, # inference size
	input_conf02, # confidence threshold
	inputs_iou02, # IoU threshold
	inputs_maxnum02, # maximum number of detections
	inputs_clsName02, # category
	inputs_opt02, # detect operation
	]

	# -------------------output component-------------------
	outputs_img = gr.Image(type="pil", label="Detection image")
	outputs_crops = gr.Gallery(label="Object crop")
	outputs_df = gr.Dataframe(max_rows=5,
	overflow_row_behaviour="paginate",
	type="pandas",
	label="List of detection information")
	outputs_objSize = gr.Label(label="Object size ratio statistics")
	outputs_clsSize = gr.Label(label="Category detection proportion statistics")
	outputs_json = gr.JSON(label="Detection information")
	outputs_pdf = gr.File(label="pdf detection report")
	outputs_csv = gr.File(label="csv detection report")
	outputs_excel = gr.File(label="xlsx detection report")

	# -------------------output component-------------------
	outputs_video = gr.Video(format='mp4', label="Detection video")

	# output parameters
	outputs_img_list = [
	outputs_img, outputs_crops, outputs_objSize, outputs_clsSize, outputs_df, outputs_json, outputs_pdf,
	outputs_csv, outputs_excel]
	outputs_video_list = [outputs_video]

	# title
	title = "Gradio YOLOv5 Det v0.4"

	# describe
	description = "Author: 曾逸夫（Zeng Yifu）, Project Address: https://gitee.com/CV_Lab/gradio_yolov5_det, Github: https://github.com/Zengyf-CVer, thanks to [Gradio](https://github.com/gradio-app/gradio) & [YOLOv5](https://github.com/ultralytics/yolov5)"
	# article="https://gitee.com/CV_Lab/gradio_yolov5_det"

	# example image
	examples = [
	[
	"./img_example/bus.jpg",
	"cpu",
	"yolov5s",
	640,
	0.6,
	0.5,
	10,
	["person", "bus"],
	["label", "pdf"],],
	[
	"./img_example/giraffe.jpg",
	"cpu",
	"yolov5l",
	320,
	0.5,
	0.45,
	12,
	["giraffe"],
	["label", "pdf"],],
	[
	"./img_example/zidane.jpg",
	"cpu",
	"yolov5m",
	640,
	0.6,
	0.5,
	15,
	["person", "tie"],
	["pdf", "json"],],
	[
	"./img_example/Millenial-at-work.jpg",
	"cpu",
	"yolov5s6",
	1280,
	0.5,
	0.5,
	20,
	["person", "chair", "cup", "laptop"],
	["label", "pdf"],],]

	# interface
	gyd_img = gr.Interface(
	fn=yolo_det_img,
	inputs=inputs_img_list,
	outputs=outputs_img_list,
	title=title,
	description=description,
	# article=article,
	examples=examples,
	# cache_examples=False,
	# theme="seafoam",
	# live=True, # Change output in real time
	flagging_dir="run", # output directory
	# allow_flagging="manual",
	# flagging_options=["good", "generally", "bad"],
	)

	gyd_video = gr.Interface(
	# fn=yolo_det_video_test,
	fn=yolo_det_video,
	inputs=inputs_video_list,
	outputs=outputs_video_list,
	title=title,
	description=description,
	# article=article,
	# examples=examples,
	# theme="seafoam",
	# live=True, # Change output in real time
	flagging_dir="run", # output directory
	allow_flagging="never",
	# flagging_options=["good", "generally", "bad"],
	)

	gyd = gr.TabbedInterface(interface_list=[gyd_img, gyd_video], tab_names=["Image Mode", "Video Mode"])

	if not is_login:
	gyd.launch(
	inbrowser=True, # Automatically open default browser
	show_tips=True, # Automatically display the latest features of gradio
	share=is_share, # Project sharing, other devices can access
	favicon_path="./icon/logo.ico", # web icon
	show_error=True, # Display error message in browser console
	quiet=True, # Suppress most print statements
	)
	else:
	gyd.launch(
	inbrowser=True, # Automatically open default browser
	show_tips=True, # Automatically display the latest features of gradio
	auth=usr_pwd, # login interface
	share=is_share, # Project sharing, other devices can access
	favicon_path="./icon/logo.ico", # web icon
	show_error=True, # Display error message in browser console
	quiet=True, # Suppress most print statements
	)


	if __name__ == "__main__":
	args = parse_args()
	main(args)