File size: 5,551 Bytes
036212a
 
 
 
 
 
 
 
 
100109f
036212a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bad794d
036212a
bad794d
036212a
 
 
 
 
bad794d
036212a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env python3
import gradio as gr
import os
from clip_interrogator import Config, Interrogator
from huggingface_hub import hf_hub_download
from share_btn import community_icon_html, loading_icon_html, share_js

MODELS = ['ViT-L (best for Stable Diffusion 1.*)', 'ViT-H (best for Stable Diffusion 2.*)']

# download preprocesed file
PREPROCESS_FILES = [
    'ViT-H-14_laion2b_s32b_b79k_artists.pkl',
    'ViT-H-14_laion2b_s32b_b79k_flavors.pkl',
    'ViT-H-14_laion2b_s32b_b79k_mediums.pkl',
    'ViT-H-14_laion2b_s32b_b79k_movements.pkl',
    'ViT-H-14_laion2b_s32b_b79k_trendings.pkl',
    'ViT-L-14_openai_artists.pkl',
    'ViT-L-14_openai_flavors.pkl',
    'ViT-L-14_openai_mediums.pkl',
    'ViT-L-14_openai_movements.pkl',
    'ViT-L-14_openai_trendings.pkl',
]
print("Download preprocessed cache files...")
for file in PREPROCESS_FILES:
    path = hf_hub_download(repo_id="pharma/ci-preprocess", filename=file, cache_dir="cache")
    cache_path = os.path.dirname(path)


# load BLIP and ViT-L https://huggingface.co/openai/clip-vit-large-patch14
config = Config(cache_path=cache_path, clip_model_path="cache", clip_model_name="ViT-L-14/openai")
ci_vitl = Interrogator(config)
ci_vitl.clip_model = ci_vitl.clip_model.to("cpu")

# load ViT-H https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K
config.blip_model = ci_vitl.blip_model
config.clip_model_name = "ViT-H-14/laion2b_s32b_b79k"
ci_vith = Interrogator(config)
ci_vith.clip_model = ci_vith.clip_model.to("cpu")


def inference(image, clip_model_name, mode):

    # move selected model to GPU and other model to CPU
    if clip_model_name == MODELS[0]:
        ci_vith.clip_model = ci_vith.clip_model.to("cpu")
        ci_vitl.clip_model = ci_vitl.clip_model.to(ci_vitl.device)
        ci = ci_vitl
    else:
        ci_vitl.clip_model = ci_vitl.clip_model.to("cpu")
        ci_vith.clip_model = ci_vith.clip_model.to(ci_vith.device)
        ci = ci_vith

    ci.config.blip_num_beams = 64
    ci.config.chunk_size = 2048
    ci.config.flavor_intermediate_count = 2048 if clip_model_name == MODELS[0] else 1024

    image = image.convert('RGB')
    if mode == 'best':
        prompt = ci.interrogate(image)
    elif mode == 'classic':
        prompt = ci.interrogate_classic(image)
    else:
        prompt = ci.interrogate_fast(image)

    return prompt, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)


TITLE = """
    <div style="text-align: center; max-width: 650px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
        "
        >
        <h1 style="font-weight: 900; margin-bottom: 7px;">
            CLIP Interrogator
        </h1>
        </div>
        <p style="margin-bottom: 10px; font-size: 94%">
        Want some ideas for your next remodel?<br>The Remodel Dreamer will create a prompt of your current home or room to create some amazing spaces!
        </p>
        
    </div>
"""

ARTICLE = """
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
  
</div>
"""

CSS = '''
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
.animate-spin {
    animation: spin 1s linear infinite;
}
@keyframes spin {
    from { transform: rotate(0deg); }
    to { transform: rotate(360deg); }
}
#share-btn-container {
    display: flex; padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; width: 13rem;
}
#share-btn {
    all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.25rem !important; padding-bottom: 0.25rem !important;
}
#share-btn * {
    all: unset;
}
#share-btn-container div:nth-child(-n+2){
    width: auto !important;
    min-height: 0px !important;
}
#share-btn-container .wrap {
    display: none !important;
}
'''

with gr.Blocks(css=CSS) as block:
    with gr.Column(elem_id="col-container"):
        gr.HTML(TITLE)

        input_image = gr.Image(type='pil', elem_id="input-img")
        input_model = gr.Dropdown(MODELS, value=MODELS[0], label='CLIP Model')
        input_mode = gr.Radio(['best', 'fast'], value='best', label='Mode')
        submit_btn = gr.Button("Submit")
        output_text = gr.Textbox(label="Output", elem_id="output-txt")

        with gr.Group(elem_id="share-btn-container"):
            community_icon = gr.HTML(community_icon_html, visible=False)
            loading_icon = gr.HTML(loading_icon_html, visible=False)
            share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)

        examples=[['example01.jpg', MODELS[0], 'best'], ['example02.jpg', MODELS[0], 'best']]
        ex = gr.Examples(
            examples=examples, 
            fn=inference, 
            inputs=[input_image, input_model, input_mode], 
            outputs=[output_text, share_button, community_icon, loading_icon], 
            cache_examples=True, 
            run_on_click=True
        )
        ex.dataset.headers = [""]
        
        gr.HTML(ARTICLE)

    submit_btn.click(
        fn=inference, 
        inputs=[input_image, input_model, input_mode], 
        outputs=[output_text, share_button, community_icon, loading_icon]
    )
    share_button.click(None, [], [], _js=share_js)

block.queue(max_size=32).launch(show_api=False)