Spaces:
Runtime error
Runtime error
Update image_summary.py
Browse files- image_summary.py +14 -4
image_summary.py
CHANGED
@@ -3,10 +3,17 @@ import gradio as gr
|
|
3 |
from PIL import Image
|
4 |
import requests
|
5 |
|
6 |
-
from transformers import ViTFeatureExtractor
|
7 |
-
feature_extractor = ViTFeatureExtractor()
|
|
|
|
|
|
|
|
|
8 |
# or, to load one that corresponds to a checkpoint on the hub:
|
9 |
-
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
|
|
|
|
|
|
|
10 |
|
11 |
from transformers import VisionEncoderDecoderModel
|
12 |
# initialize a vit-bert from a pretrained ViT and a pretrained BERT model. Note that the cross-attention layers will be randomly initialized
|
@@ -21,7 +28,10 @@ model = VisionEncoderDecoderModel.from_pretrained("./vit-bert")
|
|
21 |
#####################
|
22 |
from transformers import AutoTokenizer
|
23 |
repo_name = "ydshieh/vit-gpt2-coco-en"
|
24 |
-
feature_extractor = ViTFeatureExtractor.from_pretrained(repo_name)
|
|
|
|
|
|
|
25 |
tokenizer = AutoTokenizer.from_pretrained(repo_name)
|
26 |
model = VisionEncoderDecoderModel.from_pretrained(repo_name)
|
27 |
|
|
|
3 |
from PIL import Image
|
4 |
import requests
|
5 |
|
6 |
+
# from transformers import ViTFeatureExtractor
|
7 |
+
# feature_extractor = ViTFeatureExtractor()
|
8 |
+
from transformers import ViTImageProcessor
|
9 |
+
feature_extractor = ViTImageProcessor()
|
10 |
+
|
11 |
+
|
12 |
# or, to load one that corresponds to a checkpoint on the hub:
|
13 |
+
# feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
|
14 |
+
feature_extractor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
|
15 |
+
|
16 |
+
|
17 |
|
18 |
from transformers import VisionEncoderDecoderModel
|
19 |
# initialize a vit-bert from a pretrained ViT and a pretrained BERT model. Note that the cross-attention layers will be randomly initialized
|
|
|
28 |
#####################
|
29 |
from transformers import AutoTokenizer
|
30 |
repo_name = "ydshieh/vit-gpt2-coco-en"
|
31 |
+
# feature_extractor = ViTFeatureExtractor.from_pretrained(repo_name)
|
32 |
+
feature_extractor = ViTImageProcessor.from_pretrained(repo_name)
|
33 |
+
|
34 |
+
|
35 |
tokenizer = AutoTokenizer.from_pretrained(repo_name)
|
36 |
model = VisionEncoderDecoderModel.from_pretrained(repo_name)
|
37 |
|