yoon / app.py
yoon6173's picture
dd
433bf2b
raw
history blame
2.05 kB
import gradio as gr
import requests
import torch
from PIL import Image
from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation
import numpy as np
import torchvision.transforms as T
from PIL import Image
def greet(url):
# load Mask2Former fine-tuned on Cityscapes semantic segmentation
processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-large-cityscapes-semantic")
model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-large-cityscapes-semantic")
image = Image.open(requests.get(url, stream=True).raw)
inputs = processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# model predicts class_queries_logits of shape `(batch_size, num_queries)`
# and masks_queries_logits of shape `(batch_size, num_queries, height, width)`
class_queries_logits = outputs.class_queries_logits
masks_queries_logits = outputs.masks_queries_logits
# you can pass them to processor for postprocessing
predicted_semantic_map = processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
# we refer to the demo notebooks for visualization (see "Resources" section in the Mask2Former docs)
# predicted_semantic_map์„ 8๋น„ํŠธ ๋ถ€ํ˜ธ ์—†๋Š” ์ •์ˆ˜๋กœ ๋ณ€ํ™˜
# ์ด๋ฏธ์ง€๋ฅผ ๋ถ€ํ˜ธ ์—†๋Š” 8๋น„ํŠธ ์ •์ˆ˜๋กœ ๋ณ€ํ™˜ (0์—์„œ 255 ์‚ฌ์ด์˜ ๊ฐ’์œผ๋กœ ์Šค์ผ€์ผ๋ง)
predicted_semantic_map_scaled = (predicted_semantic_map - predicted_semantic_map.min()) / (predicted_semantic_map.max() - predicted_semantic_map.min()) * 255
predicted_semantic_map_uint8 = predicted_semantic_map_scaled.to(torch.uint8)
tensor_to_pil = T.ToPILImage()
image = tensor_to_pil(predicted_semantic_map_uint8)
return image
url = "http://www.apparelnews.co.kr/upfiles/manage/202302/5d5f694177b26fc86e5db623bf7ae4b7.jpg"
#greet(url)
iface = gr.Interface(
fn=greet,
inputs=gr.Image(value=url),
outputs = "image",
live=True
)
iface.launch(debug = True)