Browse files
@@ -224,207 +224,53 @@ async def process_single_dog(image):
224 |
225 |
return explanation, image, buttons[0], buttons[1], buttons[2], gr.update(visible=True), initial_state
226 |
227 |
async def predict(image):
228 |
if image is None:
229 |
return "Please upload an image to start.", None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
230 |
231 |
if isinstance(image, np.ndarray):
232 |
image = Image.fromarray(image)
233 |
dogs = await detect_multiple_dogs(image, conf_threshold=0.25, iou_threshold=0.4)
234 |
235 |
if len(dogs) <= 1:
236 |
return await process_single_dog(image)
237 |
238 |
color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
239 |
explanations = []
240 |
buttons = []
241 |
annotated_image = image.copy()
242 |
draw = ImageDraw.Draw(annotated_image)
243 |
font = ImageFont.load_default()
244 |
245 |
for i, (cropped_image, _, box) in enumerate(dogs):
246 |
top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
247 |
color = color_list[i % len(color_list)]
248 |
draw.rectangle(box, outline=color, width=3)
249 |
draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
250 |
251 |
breed = topk_breeds[0]
252 |
if top1_prob >= 0.5:
253 |
description = get_dog_description(breed)
254 |
formatted_description = format_description(description, breed)
255 |
explanations.append(f"Dog {i+1}: {formatted_description}")
256 |
elif top1_prob >= 0.2:
257 |
dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
258 |
dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
259 |
260 |
buttons.extend([gr.update(visible=True, value=f"Dog {i+1}: More about {breed}") for breed in topk_breeds[:3]])
261 |
262 |
explanations.append(f"Dog {i+1}: The image is unclear or the breed is not in the dataset.")
263 |
264 |
final_explanation = "\n\n".join(explanations)
265 |
if buttons:
266 |
final_explanation += "\n\nClick on a button to view more information about the breed."
267 |
initial_state = {
268 |
"explanation": final_explanation,
269 |
"buttons": buttons,
270 |
"show_back": True,
271 |
"image": annotated_image,
272 |
"is_multi_dog": True,
273 |
"dogs_info": explanations
274 |
275 |
return (final_explanation, annotated_image,
276 |
buttons[0] if len(buttons) > 0 else gr.update(visible=False),
277 |
buttons[1] if len(buttons) > 1 else gr.update(visible=False),
278 |
buttons[2] if len(buttons) > 2 else gr.update(visible=False),
279 |
280 |
281 |
282 |
initial_state = {
283 |
"explanation": final_explanation,
284 |
"buttons": [],
285 |
"show_back": False,
286 |
"image": annotated_image,
287 |
"is_multi_dog": True,
288 |
"dogs_info": explanations
289 |
290 |
return final_explanation, annotated_image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
291 |
except Exception as e:
292 |
error_msg = f"An error occurred: {str(e)}"
293 |
print(error_msg) # 添加日誌輸出
294 |
return error_msg, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
295 |
296 |
297 |
298 |
# async def detect_multiple_dogs(image, conf_threshold=0.25, iou_threshold=0.4, merge_threshold=0.5):
299 |
# results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
300 |
# dogs = []
301 |
302 |
# image_area = image.width * image.height
303 |
# min_area_ratio = 0.005 # 最小檢測面積佔整個圖像的比例
304 |
305 |
# for box in results.boxes:
306 |
# if box.cls == 16: # COCO 數據集中狗的類別是 16
307 |
# xyxy = box.xyxy[0].tolist()
308 |
# area = (xyxy[2] - xyxy[0]) * (xyxy[3] - xyxy[1])
309 |
# if area / image_area >= min_area_ratio:
310 |
# confidence = box.conf.item()
311 |
# dogs.append((xyxy, confidence))
312 |
313 |
# if dogs:
314 |
# boxes = torch.tensor([dog[0] for dog in dogs])
315 |
# scores = torch.tensor([dog[1] for dog in dogs])
316 |
317 |
# # 應用 NMS
318 |
# keep = nms(boxes, scores, iou_threshold)
319 |
320 |
# merged_dogs = []
321 |
# for i in keep:
322 |
# xyxy = boxes[i].tolist()
323 |
# confidence = scores[i].item()
324 |
# merged_dogs.append((xyxy, confidence))
325 |
326 |
# # 後處理:分離過於接近的檢測框
327 |
# final_dogs = []
328 |
# while merged_dogs:
329 |
# base_dog = merged_dogs.pop(0)
330 |
# to_merge = [base_dog]
331 |
332 |
# i = 0
333 |
# while i < len(merged_dogs):
334 |
# iou = box_iou(torch.tensor([base_dog[0]]), torch.tensor([merged_dogs[i][0]]))[0][0].item()
335 |
# if iou > merge_threshold:
336 |
# to_merge.append(merged_dogs.pop(i))
337 |
# else:
338 |
# i += 1
339 |
340 |
# if len(to_merge) == 1:
341 |
# final_dogs.append(base_dog)
342 |
# else:
343 |
# # 如果檢測到多個重疊框,嘗試分離它們
344 |
# centers = torch.tensor([[((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)] for box, _ in to_merge])
345 |
# distances = torch.cdist(centers, centers)
346 |
347 |
# if torch.any(distances > 0): # 確保不是完全重疊
348 |
# max_distance = distances.max()
349 |
# if max_distance > (base_dog[0][2] - base_dog[0][0]) * 0.5: # 如果最大距離大於框寬度的一半
350 |
# final_dogs.extend(to_merge)
351 |
# else:
352 |
# # 合併為一個框
353 |
# merged_box = torch.tensor([box for box, _ in to_merge]).mean(dim=0)
354 |
# merged_confidence = max(conf for _, conf in to_merge)
355 |
# final_dogs.append((merged_box.tolist(), merged_confidence))
356 |
# else:
357 |
# # 完全重疊的情況,保留置信度最高的
358 |
# best_dog = max(to_merge, key=lambda x: x[1])
359 |
# final_dogs.append(best_dog)
360 |
361 |
# # 擴展邊界框並創建剪裁的圖像
362 |
# expanded_dogs = []
363 |
# for xyxy, confidence in final_dogs:
364 |
# expanded_xyxy = [
365 |
# max(0, xyxy[0] - 20),
366 |
# max(0, xyxy[1] - 20),
367 |
# min(image.width, xyxy[2] + 20),
368 |
# min(image.height, xyxy[3] + 20)
369 |
# ]
370 |
# cropped_image = image.crop(expanded_xyxy)
371 |
# expanded_dogs.append((cropped_image, confidence, expanded_xyxy))
372 |
373 |
# return expanded_dogs
374 |
375 |
# # 如果沒有檢測到狗狗,返回整張圖片
376 |
# return [(image, 1.0, [0, 0, image.width, image.height])]
377 |
378 |
# async def predict(image):
379 |
# if image is None:
380 |
# return "Please upload an image to start.", None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
381 |
382 |
# try:
383 |
# if isinstance(image, np.ndarray):
384 |
# image = Image.fromarray(image)
385 |
386 |
# dogs = await detect_multiple_dogs(image)
387 |
388 |
# # 如果沒有檢測到狗狗或只檢測到一隻,使用整張圖像進行分類
389 |
# if len(dogs) <= 1:
390 |
391 |
392 |
# return await process_single_dog(image)
393 |
# else:
394 |
# dogs = [(image, 1.0, [0, 0, image.width, image.height])]
395 |
396 |
# # 多狗情境處理保持不變
397 |
# color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
398 |
# explanations = []
399 |
# buttons = []
400 |
# annotated_image = image.copy()
401 |
# draw = ImageDraw.Draw(annotated_image)
402 |
# font = ImageFont.load_default()
403 |
404 |
# for i, (cropped_image, _, box) in enumerate(dogs):
405 |
# top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
406 |
# color = color_list[i % len(color_list)]
407 |
# draw.rectangle(box, outline=color, width=3)
408 |
# draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
409 |
410 |
# breed = topk_breeds[0]
411 |
# if top1_prob >= 0.5:
412 |
# description = get_dog_description(breed)
413 |
# formatted_description = format_description(description, breed)
414 |
# explanations.append(f"Dog {i+1}: {formatted_description}")
415 |
416 |
# dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
417 |
# dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
418 |
# explanations.append(dog_explanation)
419 |
# buttons.extend([gr.update(visible=True, value=f"Dog {i+1}: More about {breed}") for breed in topk_breeds[:3]])
420 |
421 |
# final_explanation = "\n\n".join(explanations)
422 |
# if buttons:
423 |
# final_explanation += "\n\nClick on a button to view more information about the breed."
424 |
# initial_state = {
425 |
# "explanation": final_explanation,
426 |
# "buttons": buttons,
427 |
# "show_back": True
428 |
# }
429 |
# return (final_explanation, annotated_image,
430 |
# buttons[0] if len(buttons) > 0 else gr.update(visible=False),
@@ -436,27 +282,172 @@ async def predict(image):
436 |
# initial_state = {
437 |
# "explanation": final_explanation,
438 |
# "buttons": [],
439 |
# "show_back": False
440 |
# }
441 |
# return final_explanation, annotated_image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
442 |
443 |
# except Exception as e:
444 |
# error_msg = f"An error occurred: {str(e)}"
445 |
# print(error_msg) # 添加日誌輸出
446 |
# return error_msg, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
447 |
448 |
449 |
450 |
def show_details(choice, previous_output, initial_state):
451 |
if not choice:
452 |
return previous_output, gr.update(visible=True), initial_state
453 |
454 |
455 |
breed = choice.split("More about ")
456 |
description = get_dog_description(breed)
457 |
formatted_description = format_description(description, breed)
458 |
459 |
460 |
initial_state["current_description"] = formatted_description
461 |
initial_state["original_buttons"] = initial_state.get("buttons", [])
462 |
@@ -471,10 +462,8 @@ def go_back(state):
471 |
return (
472 |
473 |
474 |
475 |
476 |
buttons[2] if len(buttons) > 2 else gr.update(visible=False),
477 |
gr.update(visible=False), # 隱藏 back 按鈕
478 |
479 |
480 |
@@ -488,10 +477,9 @@ with gr.Blocks() as iface:
488 |
489 |
output = gr.Markdown(label="Prediction Results")
490 |
491 |
492 |
493 |
494 |
btn3 = gr.Button("View More 3", visible=False)
495 |
496 |
back_button = gr.Button("Back", visible=False)
497 |
@@ -500,20 +488,25 @@ with gr.Blocks() as iface:
500 |
501 |
502 |
503 |
outputs=[output, output_image,
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
outputs=[output, output_image,
517 |
518 |
519 |
@@ -524,4 +517,6 @@ with gr.Blocks() as iface:
524 |
gr.HTML('For more details on this project and other work, feel free to visit my GitHub <a href="">Dog Breed Classifier</a>')
525 |
526 |
if __name__ == "__main__":
527 |
224 |
225 |
return explanation, image, buttons[0], buttons[1], buttons[2], gr.update(visible=True), initial_state
226 |
227 |
# async def predict(image):
228 |
# if image is None:
229 |
# return "Please upload an image to start.", None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
230 |
# try:
231 |
# if isinstance(image, np.ndarray):
232 |
# image = Image.fromarray(image)
233 |
# dogs = await detect_multiple_dogs(image, conf_threshold=0.25, iou_threshold=0.4)
234 |
235 |
# if len(dogs) <= 1:
236 |
# return await process_single_dog(image)
237 |
238 |
# color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
239 |
# explanations = []
240 |
# buttons = []
241 |
# annotated_image = image.copy()
242 |
# draw = ImageDraw.Draw(annotated_image)
243 |
# font = ImageFont.load_default()
244 |
245 |
# for i, (cropped_image, _, box) in enumerate(dogs):
246 |
# top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
247 |
# color = color_list[i % len(color_list)]
248 |
# draw.rectangle(box, outline=color, width=3)
249 |
# draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
250 |
251 |
# breed = topk_breeds[0]
252 |
# if top1_prob >= 0.5:
253 |
# description = get_dog_description(breed)
254 |
# formatted_description = format_description(description, breed)
255 |
# explanations.append(f"Dog {i+1}: {formatted_description}")
256 |
# elif top1_prob >= 0.2:
257 |
# dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
258 |
# dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
259 |
# explanations.append(dog_explanation)
260 |
# buttons.extend([gr.update(visible=True, value=f"Dog {i+1}: More about {breed}") for breed in topk_breeds[:3]])
261 |
# else:
262 |
# explanations.append(f"Dog {i+1}: The image is unclear or the breed is not in the dataset.")
263 |
264 |
# final_explanation = "\n\n".join(explanations)
265 |
# if buttons:
266 |
# final_explanation += "\n\nClick on a button to view more information about the breed."
267 |
# initial_state = {
268 |
# "explanation": final_explanation,
269 |
# "buttons": buttons,
270 |
# "show_back": True,
271 |
# "image": annotated_image,
272 |
# "is_multi_dog": True,
273 |
# "dogs_info": explanations
274 |
# }
275 |
# return (final_explanation, annotated_image,
276 |
# buttons[0] if len(buttons) > 0 else gr.update(visible=False),
282 |
# initial_state = {
283 |
# "explanation": final_explanation,
284 |
# "buttons": [],
285 |
# "show_back": False,
286 |
# "image": annotated_image,
287 |
# "is_multi_dog": True,
288 |
# "dogs_info": explanations
289 |
# }
290 |
# return final_explanation, annotated_image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
291 |
# except Exception as e:
292 |
# error_msg = f"An error occurred: {str(e)}"
293 |
# print(error_msg) # 添加日誌輸出
294 |
# return error_msg, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
295 |
296 |
297 |
# def show_details(choice, previous_output, initial_state):
298 |
# if not choice:
299 |
# return previous_output, gr.update(visible=True), initial_state
300 |
301 |
# try:
302 |
# breed = choice.split("More about ")[-1]
303 |
# description = get_dog_description(breed)
304 |
# formatted_description = format_description(description, breed)
305 |
306 |
# # 保存當前描述和原始按鈕狀態
307 |
# initial_state["current_description"] = formatted_description
308 |
# initial_state["original_buttons"] = initial_state.get("buttons", [])
309 |
310 |
# return formatted_description, gr.update(visible=True), initial_state
311 |
# except Exception as e:
312 |
# error_msg = f"An error occurred while showing details: {e}"
313 |
# print(error_msg)
314 |
# return error_msg, gr.update(visible=True), initial_state
315 |
316 |
# def go_back(state):
317 |
# buttons = state.get("buttons", [])
318 |
# return (
319 |
# state["explanation"],
320 |
# state["image"],
321 |
# buttons[0] if len(buttons) > 0 else gr.update(visible=False),
322 |
# buttons[1] if len(buttons) > 1 else gr.update(visible=False),
323 |
# buttons[2] if len(buttons) > 2 else gr.update(visible=False),
324 |
# gr.update(visible=False), # 隱藏 back 按鈕
325 |
# state
326 |
# )
327 |
328 |
# with gr.Blocks() as iface:
329 |
# gr.HTML("<h1 style='text-align: center;'>🐶 Dog Breed Classifier 🔍</h1>")
330 |
# gr.HTML("<p style='text-align: center;'>Upload a picture of a dog, and the model will predict its breed, provide detailed information, and include an extra information link!</p>")
331 |
332 |
# with gr.Row():
333 |
# input_image = gr.Image(label="Upload a dog image", type="pil")
334 |
# output_image = gr.Image(label="Annotated Image")
335 |
336 |
# output = gr.Markdown(label="Prediction Results")
337 |
338 |
# with gr.Row():
339 |
# btn1 = gr.Button("View More 1", visible=False)
340 |
# btn2 = gr.Button("View More 2", visible=False)
341 |
# btn3 = gr.Button("View More 3", visible=False)
342 |
343 |
# back_button = gr.Button("Back", visible=False)
344 |
345 |
# initial_state = gr.State()
346 |
347 |
# input_image.change(
348 |
# predict,
349 |
# inputs=input_image,
350 |
# outputs=[output, output_image, btn1, btn2, btn3, back_button, initial_state]
351 |
# )
352 |
353 |
# for btn in [btn1, btn2, btn3]:
354 |
355 |
# show_details,
356 |
# inputs=[btn, output, initial_state],
357 |
# outputs=[output, back_button, initial_state]
358 |
# )
359 |
360 |
361 |
# go_back,
362 |
# inputs=[initial_state],
363 |
# outputs=[output, output_image, btn1, btn2, btn3, back_button, initial_state]
364 |
# )
365 |
366 |
# gr.Examples(
367 |
# examples=['Border_Collie.jpg', 'Golden_Retriever.jpeg', 'Saint_Bernard.jpeg', 'French_Bulldog.jpeg', 'Samoyed.jpg'],
368 |
# inputs=input_image
369 |
# )
370 |
371 |
# gr.HTML('For more details on this project and other work, feel free to visit my GitHub <a href="">Dog Breed Classifier</a>')
372 |
373 |
# if __name__ == "__main__":
374 |
# iface.launch()
375 |
376 |
377 |
async def predict(image):
378 |
if image is None:
379 |
return "Please upload an image to start.", None, [], gr.update(visible=False), None
380 |
381 |
382 |
if isinstance(image, np.ndarray):
383 |
image = Image.fromarray(image)
384 |
385 |
dogs = await detect_multiple_dogs(image)
386 |
387 |
color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
388 |
explanations = []
389 |
buttons = []
390 |
annotated_image = image.copy()
391 |
draw = ImageDraw.Draw(annotated_image)
392 |
font = ImageFont.load_default()
393 |
394 |
for i, (cropped_image, _, box) in enumerate(dogs):
395 |
top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
396 |
color = color_list[i % len(color_list)]
397 |
draw.rectangle(box, outline=color, width=3)
398 |
draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
399 |
400 |
if top1_prob >= 0.5:
401 |
breed = topk_breeds[0]
402 |
description = get_dog_description(breed)
403 |
formatted_description = format_description(description, breed)
404 |
explanations.append(f"Dog {i+1}: {formatted_description}")
405 |
elif top1_prob >= 0.2:
406 |
dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
407 |
dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
408 |
409 |
buttons.extend([gr.Button(f"Dog {i+1}: More about {breed}", visible=True) for breed in topk_breeds[:3]])
410 |
411 |
explanations.append(f"Dog {i+1}: The image is unclear or the breed is not in the dataset.")
412 |
413 |
final_explanation = "\n\n".join(explanations)
414 |
if buttons:
415 |
final_explanation += "\n\nClick on a button to view more information about the breed."
416 |
initial_state = {
417 |
"explanation": final_explanation,
418 |
"buttons": buttons,
419 |
"show_back": True,
420 |
"image": annotated_image,
421 |
"is_multi_dog": len(dogs) > 1,
422 |
"dogs_info": explanations
423 |
424 |
return final_explanation, annotated_image, buttons, gr.update(visible=True), initial_state
425 |
426 |
initial_state = {
427 |
"explanation": final_explanation,
428 |
"buttons": [],
429 |
"show_back": False,
430 |
"image": annotated_image,
431 |
"is_multi_dog": len(dogs) > 1,
432 |
"dogs_info": explanations
433 |
434 |
return final_explanation, annotated_image, [], gr.update(visible=False), initial_state
435 |
436 |
except Exception as e:
437 |
error_msg = f"An error occurred: {str(e)}"
438 |
print(error_msg) # Add log output
439 |
return error_msg, None, [], gr.update(visible=False), None
440 |
441 |
def show_details(choice, previous_output, initial_state):
442 |
if not choice:
443 |
return previous_output, gr.update(visible=True), initial_state
444 |
445 |
446 |
dog_num, breed = choice.split(": More about ")
447 |
description = get_dog_description(breed)
448 |
formatted_description = format_description(description, breed)
449 |
450 |
# Save current description and original button state
451 |
initial_state["current_description"] = formatted_description
452 |
initial_state["original_buttons"] = initial_state.get("buttons", [])
453 |
462 |
return (
463 |
464 |
465 |
466 |
gr.update(visible=False), # Hide back button
467 |
468 |
469 |
477 |
478 |
output = gr.Markdown(label="Prediction Results")
479 |
480 |
button_group = gr.Group()
481 |
with button_group:
482 |
buttons = []
483 |
484 |
back_button = gr.Button("Back", visible=False)
485 |
488 |
489 |
490 |
491 |
outputs=[output, output_image, button_group, back_button, initial_state]
492 |
493 |
494 |
def update_buttons(buttons):
495 |
496 |
for btn in buttons:
497 |
button = gr.Button(btn.value)
498 |
499 |
500 |
inputs=[button, output, initial_state],
501 |
outputs=[output, back_button, initial_state]
502 |
503 |
504 |
return button_group
505 |
506 |
507 |
508 |
509 |
outputs=[output, output_image, button_group, back_button, initial_state]
510 |
511 |
512 |
517 |
gr.HTML('For more details on this project and other work, feel free to visit my GitHub <a href="">Dog Breed Classifier</a>')
518 |
519 |
if __name__ == "__main__":
520 |
521 |
522 |