import gradio as gr import requests import torch from PIL import Image from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation import numpy as np def greet(url): processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-large-cityscapes-semantic") model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-large-cityscapes-semantic") image = Image.open(requests.get(url, stream=True).raw) inputs = processor(images=image, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # model predicts class_queries_logits of shape `(batch_size, num_queries)` # and masks_queries_logits of shape `(batch_size, num_queries, height, width)` class_queries_logits = outputs.class_queries_logits masks_queries_logits = outputs.masks_queries_logits # you can pass them to processor for postprocessing predicted_semantic_map = processor.post_process_semantic_segmentation(outputs, target_sizes=[url.size])[0] sepia_filter = np.array([ [0.393, 0.769, 0.189], [0.349, 0.686, 0.168], [0.272, 0.534, 0.131] ]) sepia_img = predicted_semantic_map.dot(sepia_filter.T) sepia_img /= sepia_img.max() return sepia_img url = "http://images.cocodataset.org/val2017/000000039769.jpg" greet(url) iface = gr.Interface( fn=greet, inputs=gr.Image(value=url), outputs="image" ) iface.launch(debug = True)