File size: 6,876 Bytes
c3bc979 2773523 c3bc979 2773523 c3bc979 953e4d8 c3bc979 70d5b34 c3bc979 70d5b34 7f690a1 70d5b34 953e4d8 c3bc979 89d8ace 953e4d8 c3bc979 953e4d8 c3bc979 89d8ace 953e4d8 c3bc979 953e4d8 c3bc979 953e4d8 c3bc979 953e4d8 c3bc979 953e4d8 c3bc979 953e4d8 c3bc979 89d8ace 953e4d8 c3bc979 953e4d8 c3bc979 953e4d8 c38d0f4 c3bc979 70d5b34 953e4d8 70d5b34 953e4d8 c3bc979 70d5b34 c3bc979 70d5b34 53fbd3a c3bc979 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
from __future__ import annotations
import os
import gradio as gr
import torch
from gradio_client import Client
from gradio_client.client import Job
DESCRIPTION = "# Comparing image captioning models"
ORIGINAL_SPACE_INFO = """\
- [BLIP fine-tuned for long captions](https://huggingface.co/spaces/unography/image-captioning-with-longcap)
- [GIT-large fine-tuned on COCO](https://huggingface.co/spaces/library-samples/image-captioning-with-git)
- [BLIP-large](https://huggingface.co/spaces/library-samples/image-captioning-with-blip)
- [BLIP-2 OPT 6.7B](https://huggingface.co/spaces/merve/BLIP2-with-transformers)
- [BLIP-2 T5-XXL](https://huggingface.co/spaces/hysts/BLIP2-with-transformers)
- [InstructBLIP](https://huggingface.co/spaces/library-samples/InstructBLIP)
- [Fuyu-8B](https://huggingface.co/spaces/adept/fuyu-8b-demo)
"""
torch.hub.download_url_to_file("http://images.cocodataset.org/val2017/000000039769.jpg", "cats.jpg")
torch.hub.download_url_to_file(
"https://huggingface.co/datasets/nielsr/textcaps-sample/resolve/main/stop_sign.png", "stop_sign.png"
)
torch.hub.download_url_to_file(
"https://cdn.openai.com/dall-e-2/demos/text2im/astronaut/horse/photo/0.jpg", "astronaut.jpg"
)
def generate_caption_longcap(image_path: str, return_job: bool = False) -> str | Job:
try:
client = Client("unography/image-captioning-with-longcap")
fn = client.submit if return_job else client.predict
return fn(image_path, api_name="/caption")
except Exception:
gr.Warning("The GIT-large Space is currently unavailable. Please try again later.")
return ""
def generate_caption_git(image_path: str, return_job: bool = False) -> str | Job:
try:
client = Client("hysts/image-captioning-with-git")
fn = client.submit if return_job else client.predict
return fn(image_path, api_name="/caption")
except Exception:
gr.Warning("The GIT-large Space is currently unavailable. Please try again later.")
return ""
def generate_caption_blip(image_path: str, return_job: bool = False) -> str | Job:
try:
client = Client("hysts/image-captioning-with-blip")
fn = client.submit if return_job else client.predict
return fn(image_path, "A picture of", api_name="/caption")
except Exception:
gr.Warning("The BLIP-large Space is currently unavailable. Please try again later.")
return ""
def generate_caption_blip2_opt(image_path: str, return_job: bool = False) -> str | Job:
try:
client = Client("merve/BLIP2-with-transformers")
fn = client.submit if return_job else client.predict
return fn(
image_path,
"Beam search",
1, # temperature
1, # length penalty
1.5, # repetition penalty
api_name="/caption",
)
except Exception:
gr.Warning("The BLIP2 OPT6.7B Space is currently unavailable. Please try again later.")
return ""
def generate_caption_blip2_t5xxl(image_path: str, return_job: bool = False) -> str | Job:
try:
client = Client("hysts/BLIP2-with-transformers")
fn = client.submit if return_job else client.predict
return fn(
image_path,
"Beam search",
1, # temperature
1, # length penalty
1.5, # repetition penalty
50, # max length
1, # min length
5, # number of beams
0.9, # top p
api_name="/caption",
)
except Exception:
gr.Warning("The BLIP2 T5-XXL Space is currently unavailable. Please try again later.")
return ""
def generate_caption_instructblip(image_path: str, return_job: bool = False) -> str | Job:
try:
client = Client("hysts/InstructBLIP")
fn = client.submit if return_job else client.predict
return fn(
image_path,
"Describe the image.",
"Beam search",
5, # beam size
256, # max length
1, # min length
0.9, # top p
1.5, # repetition penalty
1.0, # length penalty
1.0, # temperature
api_name="/run",
)
except Exception:
gr.Warning("The InstructBLIP Space is currently unavailable. Please try again later.")
return ""
def generate_caption_fuyu(image_path: str, return_job: bool = False) -> str | Job:
try:
client = Client("adept/fuyu-8b-demo")
fn = client.submit if return_job else client.predict
return fn(image_path, "Generate a coco style caption.\n", fn_index=3)
except Exception:
gr.Warning("The Fuyu-8B Space is currently unavailable. Please try again later.")
return ""
def generate_captions(image_path: str) -> tuple[str, str, str, str, str, str, str]:
jobs = [
generate_caption_longcap(image_path, return_job=True),
generate_caption_git(image_path, return_job=True),
generate_caption_blip(image_path, return_job=True),
generate_caption_blip2_opt(image_path, return_job=True),
generate_caption_blip2_t5xxl(image_path, return_job=True),
generate_caption_instructblip(image_path, return_job=True),
generate_caption_fuyu(image_path, return_job=True),
]
return tuple(job.result() if job else "" for job in jobs)
with gr.Blocks(css="style.css") as demo:
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column():
input_image = gr.Image(type="filepath")
run_button = gr.Button("Caption")
with gr.Column():
out_longcap = gr.Textbox(label="BLIP fine-tuned for long captions")
out_git = gr.Textbox(label="GIT-large fine-tuned on COCO")
out_blip = gr.Textbox(label="BLIP-large")
out_blip2_opt = gr.Textbox(label="BLIP-2 OPT 6.7B")
out_blip2_t5xxl = gr.Textbox(label="BLIP-2 T5-XXL")
out_instructblip = gr.Textbox(label="InstructBLIP")
out_fuyu = gr.Textbox(label="Fuyu-8B")
outputs = [
out_longcap,
out_git,
out_blip,
out_blip2_opt,
out_blip2_t5xxl,
out_instructblip,
out_fuyu,
]
gr.Examples(
examples=[
"cats.jpg",
"stop_sign.png",
"astronaut.jpg",
],
inputs=input_image,
outputs=outputs,
fn=generate_captions,
cache_examples=os.getenv("CACHE_EXAMPLES") == "1",
)
with gr.Accordion(label="The original Spaces can be found here:", open=False):
gr.Markdown(ORIGINAL_SPACE_INFO)
run_button.click(
fn=generate_captions,
inputs=input_image,
outputs=outputs,
api_name="caption",
)
if __name__ == "__main__":
demo.queue(max_size=20).launch()
|