|
from transformers.utils import logging |
|
from transformers import AutoProcessor |
|
from transformers import CLIPModel |
|
import gradio as gr |
|
import torch |
|
import requests |
|
from PIL import Image |
|
|
|
logging.set_verbosity_error() |
|
|
|
model = CLIPModel.from_pretrained( |
|
"openai/clip-vit-large-patch14") |
|
processor = AutoProcessor.from_pretrained( |
|
"openai/clip-vit-large-patch14") |
|
|
|
def process_image(input_type, image_url, image_upload, labels): |
|
if input_type == "URL": |
|
raw_image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB') |
|
else: |
|
raw_image = image_upload |
|
|
|
labels = [l.strip() for l in labels.split(",")] |
|
print(labels) |
|
|
|
inputs = processor(text=labels, images=raw_image, return_tensors="pt", padding=True) |
|
outputs = model(**inputs) |
|
probs = outputs.logits_per_image.softmax(dim=1)[0] |
|
probs = list(probs) |
|
for i in range(len(labels)): |
|
print(f"label: {labels[i]} - probability of detected object being {probs[i].item():.4f}%") |
|
|
|
answer = str(labels[probs.index(max(probs))]).capitalize() |
|
print(answer) |
|
answer = ( |
|
f"""<div> |
|
<h2 style='text-align: center; font-size: 30px; color: blue;'>The detected object is </h2> |
|
<h1 style='text-align: center; font-size: 50px; color: orange;'>{answer}</h1> |
|
<h2 style='text-align: center; font-size: 30px; color: blue;'> with a probability of </h2> |
|
<h1 style='text-align: center; font-size: 50px; color: orange;'>{max(probs)*100:.2f}</h1> |
|
</div>""" |
|
) |
|
return answer |
|
|
|
def display_image_from_url(image_url): |
|
if image_url: |
|
image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB') |
|
return image |
|
return None |
|
|
|
def toggle_inputs(input_type): |
|
if input_type == "URL": |
|
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True) |
|
else: |
|
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True) |
|
|
|
sample_image = Image.open("./huggingface_friends.jpg") |
|
sample_labels = "a photo of a man, a photo of a dog, cats, two cats, group of friends dining, food, people eating, men and women" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown( |
|
""" |
|
# Determine best label for the picture out of a set of possible labels - test & demo app by Srinivas.V.. |
|
Paste either URL of an image or upload the image, type-in your label choices for the image, |
|
seperated by comma (',') and submit. |
|
""") |
|
|
|
input_type = gr.Radio(choices=["URL", "Upload"], label="Input Type") |
|
image_url = gr.Textbox(value= 'https://huggingface.co/spaces/vsrinivas/Determine_Best_Label_from_Set_of_Given_Labels/resolve/main/huggingface_friends.jpg', label="Type-in/ Paste Image URL", visible=False) |
|
url_image = gr.Image(value=sample_image,type="pil", label="URL Image", visible=False) |
|
image_upload = gr.Image(value=sample_image,type="pil", label="Uploaded Image", visible=False) |
|
labels = gr.Textbox(value=sample_labels, label="Type in your labels seperated by comma(',')", visible=False, lines=2) |
|
|
|
input_type.change(fn=toggle_inputs, inputs=input_type, outputs=[image_url, url_image, image_upload, labels]) |
|
image_url.change(fn=display_image_from_url, inputs=image_url, outputs=url_image) |
|
|
|
submit_btn = gr.Button("Submit") |
|
processed_image = gr.HTML(label="The Answer") |
|
submit_btn.click(fn=process_image, inputs=[input_type, image_url, image_upload, labels], outputs=processed_image) |
|
|
|
demo.launch(debug=True, share=True) |