File size: 3,570 Bytes
821db2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73753ad
821db2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d952fe
821db2a
3d952fe
821db2a
c04992d
 
821db2a
 
 
 
b3943d4
821db2a
 
 
 
 
c04992d
3d952fe
 
 
821db2a
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from transformers.utils import logging
from transformers import AutoProcessor
from transformers import CLIPModel
import gradio as gr
import torch
import requests
from PIL import Image

logging.set_verbosity_error()

model = CLIPModel.from_pretrained(
    "openai/clip-vit-large-patch14")
processor = AutoProcessor.from_pretrained(
    "openai/clip-vit-large-patch14")

def process_image(input_type, image_url, image_upload, labels):
    if input_type == "URL":
        raw_image =  Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
    else:
        raw_image = image_upload

    labels = [l.strip() for l  in labels.split(",")]
    print(labels)

    inputs = processor(text=labels, images=raw_image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    probs = outputs.logits_per_image.softmax(dim=1)[0]
    probs = list(probs)
    for i in range(len(labels)):
      print(f"label: {labels[i]} - probability of detected object being {probs[i].item():.4f}%")
    
    answer = str(labels[probs.index(max(probs))]).capitalize()
    print(answer)
    answer = (
        f"""<div>
              <h2 style='text-align: center; font-size: 30px; color: blue;'>The detected object is </h2>
              <h1 style='text-align: center; font-size: 50px; color: orange;'>{answer}</h1> 
              <h2 style='text-align: center; font-size: 30px; color: blue;'> with a probability of </h2>
              <h1 style='text-align: center; font-size: 50px; color: orange;'>{max(probs)*100:.2f}</h1>
        </div>"""
    )
    return answer

def display_image_from_url(image_url):
    if image_url:
        image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
        return image
    return None

def toggle_inputs(input_type):
    if input_type == "URL":
        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
    else:
        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)

sample_image = Image.open("./huggingface_friends.jpg")
sample_labels = "a photo of a man, a photo of a dog, cats, two cats, group of friends dining, food, people eating, men and women"

with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Determine best label for the picture out of a set of possible labels - test & demo app by Srinivas.V..
    Paste either URL of an image or upload the image, type-in your label choices for the image, 
    seperated by comma (',') and submit.
    """)

    input_type = gr.Radio(choices=["URL", "Upload"], label="Input Type")
    image_url = gr.Textbox(value= 'https://huggingface.co/spaces/vsrinivas/Determine_Best_Label_from_Set_of_Given_Labels/resolve/main/huggingface_friends.jpg', label="Type-in/ Paste Image URL", visible=False)
    url_image = gr.Image(value=sample_image,type="pil", label="URL Image", visible=False)
    image_upload = gr.Image(value=sample_image,type="pil", label="Uploaded Image", visible=False)
    labels = gr.Textbox(value=sample_labels, label="Type in your labels seperated by comma(',')", visible=False, lines=2)

    input_type.change(fn=toggle_inputs, inputs=input_type, outputs=[image_url, url_image, image_upload, labels])
    image_url.change(fn=display_image_from_url, inputs=image_url, outputs=url_image)

    submit_btn = gr.Button("Submit")
    processed_image = gr.HTML(label="The Answer")
    submit_btn.click(fn=process_image, inputs=[input_type, image_url, image_upload, labels], outputs=processed_image)

demo.launch(debug=True, share=True)