Spaces:
Runtime error
Runtime error
added git-base-coco model
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import AutoProcessor, BlipForConditionalGeneration
|
| 3 |
|
| 4 |
# from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration, VisionEncoderDecoderModel
|
| 5 |
import torch
|
|
@@ -11,8 +11,8 @@ torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/0000000397
|
|
| 11 |
torch.hub.download_url_to_file('https://huggingface.co/datasets/nielsr/textcaps-sample/resolve/main/stop_sign.png', 'stop_sign.png')
|
| 12 |
torch.hub.download_url_to_file('https://cdn.openai.com/dall-e-2/demos/text2im/astronaut/horse/photo/0.jpg', 'astronaut.jpg')
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
|
| 17 |
# git_processor_large_coco = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
| 18 |
# git_model_large_coco = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
|
@@ -76,7 +76,7 @@ def generate_caption_coca(model, transform, image):
|
|
| 76 |
|
| 77 |
|
| 78 |
def generate_captions(image):
|
| 79 |
-
|
| 80 |
|
| 81 |
# caption_git_large_coco = generate_caption(git_processor_large_coco, git_model_large_coco, image)
|
| 82 |
|
|
@@ -101,7 +101,7 @@ def generate_captions(image):
|
|
| 101 |
|
| 102 |
examples = [["cats.jpg"], ["stop_sign.png"], ["astronaut.jpg"]]
|
| 103 |
# outputs = [gr.outputs.Textbox(label="Caption generated by GIT-large fine-tuned on COCO"), gr.outputs.Textbox(label="Caption generated by GIT-large fine-tuned on TextCaps"), gr.outputs.Textbox(label="Caption generated by BLIP-large"), gr.outputs.Textbox(label="Caption generated by CoCa"), gr.outputs.Textbox(label="Caption generated by BLIP-2 OPT 6.7b")]
|
| 104 |
-
outputs = [gr.outputs.Textbox(label="Caption generated by BLIP-base")
|
| 105 |
|
| 106 |
title = "Interactive demo: comparing image captioning models"
|
| 107 |
description = "Gradio Demo to compare GIT, BLIP, CoCa, and BLIP-2, 4 state-of-the-art vision+language models. To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import AutoProcessor, BlipForConditionalGeneration, AutoModelForCausalLM
|
| 3 |
|
| 4 |
# from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration, VisionEncoderDecoderModel
|
| 5 |
import torch
|
|
|
|
| 11 |
torch.hub.download_url_to_file('https://huggingface.co/datasets/nielsr/textcaps-sample/resolve/main/stop_sign.png', 'stop_sign.png')
|
| 12 |
torch.hub.download_url_to_file('https://cdn.openai.com/dall-e-2/demos/text2im/astronaut/horse/photo/0.jpg', 'astronaut.jpg')
|
| 13 |
|
| 14 |
+
git_processor_base = AutoProcessor.from_pretrained("microsoft/git-base-coco")
|
| 15 |
+
git_model_base = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")
|
| 16 |
|
| 17 |
# git_processor_large_coco = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
| 18 |
# git_model_large_coco = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
|
|
|
| 76 |
|
| 77 |
|
| 78 |
def generate_captions(image):
|
| 79 |
+
caption_git_base = generate_caption(git_processor_base, git_model_base, image)
|
| 80 |
|
| 81 |
# caption_git_large_coco = generate_caption(git_processor_large_coco, git_model_large_coco, image)
|
| 82 |
|
|
|
|
| 101 |
|
| 102 |
examples = [["cats.jpg"], ["stop_sign.png"], ["astronaut.jpg"]]
|
| 103 |
# outputs = [gr.outputs.Textbox(label="Caption generated by GIT-large fine-tuned on COCO"), gr.outputs.Textbox(label="Caption generated by GIT-large fine-tuned on TextCaps"), gr.outputs.Textbox(label="Caption generated by BLIP-large"), gr.outputs.Textbox(label="Caption generated by CoCa"), gr.outputs.Textbox(label="Caption generated by BLIP-2 OPT 6.7b")]
|
| 104 |
+
outputs = [gr.outputs.Textbox(label="Caption generated by GIT-base fine-tuned on COCO"), gr.outputs.Textbox(label="Caption generated by BLIP-base")]
|
| 105 |
|
| 106 |
title = "Interactive demo: comparing image captioning models"
|
| 107 |
description = "Gradio Demo to compare GIT, BLIP, CoCa, and BLIP-2, 4 state-of-the-art vision+language models. To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
|