Update app.py
Browse files
app.py
CHANGED
@@ -12,34 +12,39 @@ pipes = {
|
|
12 |
"ViT/H-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-huge-patch14"),
|
13 |
}
|
14 |
inputs = [
|
15 |
-
gr.inputs.Image(type='pil'
|
16 |
-
|
|
|
|
|
17 |
gr.inputs.Radio(choices=[
|
18 |
"ViT/B-16",
|
19 |
"ViT/L-14",
|
20 |
"ViT/L-14@336px",
|
21 |
"ViT/H-14",
|
22 |
-
], type="value", default="ViT/B-16", label="Model"),
|
|
|
|
|
|
|
23 |
]
|
24 |
images="festival.jpg"
|
25 |
|
26 |
-
def shot(image, labels_text, model_name):
|
27 |
labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
|
28 |
res = pipes[model_name](images=image,
|
29 |
candidate_labels=labels,
|
30 |
-
hypothesis_template=
|
31 |
return {dic["label"]: dic["score"] for dic in res}
|
32 |
|
33 |
iface = gr.Interface(shot,
|
34 |
inputs,
|
35 |
"label",
|
36 |
-
examples=[["festival.jpg", "灯笼, 鞭炮, 对联", "ViT/B-16"],
|
37 |
-
["cat-dog-music.png", "音乐表演, 体育运动", "ViT/B-16"],
|
38 |
-
["football-match.jpg", "梅西, C罗, 马奎尔", "ViT/B-16"]],
|
39 |
description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
|
40 |
Paper: <a href='https://arxiv.org/abs/2211.01335'>https://arxiv.org/abs/2211.01335</a> <br>
|
41 |
Github: <a href='https://github.com/OFA-Sys/Chinese-CLIP'>https://github.com/OFA-Sys/Chinese-CLIP</a> (Welcome to star! 🔥🔥) <br><br>
|
42 |
-
To play with this demo, add a picture and a list of labels in Chinese separated by commas.
|
43 |
You can duplicate this space and run it privately: <a href='https://huggingface.co/spaces/OFA-Sys/chinese-clip-zero-shot-image-classification?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14' alt='Duplicate Space'></a></p>""",
|
44 |
title="Zero-shot Image Classification (中文零样本图像分类)")
|
45 |
|
|
|
12 |
"ViT/H-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-huge-patch14"),
|
13 |
}
|
14 |
inputs = [
|
15 |
+
gr.inputs.Image(type='pil',
|
16 |
+
label="Image 输入图片"),
|
17 |
+
gr.inputs.Textbox(lines=1,
|
18 |
+
label="Candidate Labels 候选分类标签"),
|
19 |
gr.inputs.Radio(choices=[
|
20 |
"ViT/B-16",
|
21 |
"ViT/L-14",
|
22 |
"ViT/L-14@336px",
|
23 |
"ViT/H-14",
|
24 |
+
], type="value", default="ViT/B-16", label="Model 模型规模"),
|
25 |
+
gr.inputs.Textbox(lines=1,
|
26 |
+
label="Prompt Template Prompt模板 ({}指代候选标签)",
|
27 |
+
default="一张{}的图片。"),
|
28 |
]
|
29 |
images="festival.jpg"
|
30 |
|
31 |
+
def shot(image, labels_text, model_name, hypothesis_template):
|
32 |
labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
|
33 |
res = pipes[model_name](images=image,
|
34 |
candidate_labels=labels,
|
35 |
+
hypothesis_template=hypothesis_template)
|
36 |
return {dic["label"]: dic["score"] for dic in res}
|
37 |
|
38 |
iface = gr.Interface(shot,
|
39 |
inputs,
|
40 |
"label",
|
41 |
+
examples=[["festival.jpg", "灯笼, 鞭炮, 对联", "ViT/B-16", "一张{}的图片。"],
|
42 |
+
["cat-dog-music.png", "音乐表演, 体育运动", "ViT/B-16", "一张{}的图片。"],
|
43 |
+
["football-match.jpg", "梅西, C罗, 马奎尔", "ViT/B-16", "一张{}的图片。"]],
|
44 |
description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
|
45 |
Paper: <a href='https://arxiv.org/abs/2211.01335'>https://arxiv.org/abs/2211.01335</a> <br>
|
46 |
Github: <a href='https://github.com/OFA-Sys/Chinese-CLIP'>https://github.com/OFA-Sys/Chinese-CLIP</a> (Welcome to star! 🔥🔥) <br><br>
|
47 |
+
To play with this demo, add a picture and a list of labels in Chinese separated by commas. 上传图片,并输入多个分类标签,用英文逗号分隔。可点击页面最下方示例参考。<br>
|
48 |
You can duplicate this space and run it privately: <a href='https://huggingface.co/spaces/OFA-Sys/chinese-clip-zero-shot-image-classification?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14' alt='Duplicate Space'></a></p>""",
|
49 |
title="Zero-shot Image Classification (中文零样本图像分类)")
|
50 |
|