ipvikas commited on
Commit
f812ffa
·
verified ·
1 Parent(s): 199e195

Update image_summary.py

Browse files
Files changed (1) hide show
  1. image_summary.py +14 -4
image_summary.py CHANGED
@@ -3,10 +3,17 @@ import gradio as gr
3
  from PIL import Image
4
  import requests
5
 
6
- from transformers import ViTFeatureExtractor
7
- feature_extractor = ViTFeatureExtractor()
 
 
 
 
8
  # or, to load one that corresponds to a checkpoint on the hub:
9
- feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
 
 
 
10
 
11
  from transformers import VisionEncoderDecoderModel
12
  # initialize a vit-bert from a pretrained ViT and a pretrained BERT model. Note that the cross-attention layers will be randomly initialized
@@ -21,7 +28,10 @@ model = VisionEncoderDecoderModel.from_pretrained("./vit-bert")
21
  #####################
22
  from transformers import AutoTokenizer
23
  repo_name = "ydshieh/vit-gpt2-coco-en"
24
- feature_extractor = ViTFeatureExtractor.from_pretrained(repo_name)
 
 
 
25
  tokenizer = AutoTokenizer.from_pretrained(repo_name)
26
  model = VisionEncoderDecoderModel.from_pretrained(repo_name)
27
 
 
3
  from PIL import Image
4
  import requests
5
 
6
+ # from transformers import ViTFeatureExtractor
7
+ # feature_extractor = ViTFeatureExtractor()
8
+ from transformers import ViTImageProcessor
9
+ feature_extractor = ViTImageProcessor()
10
+
11
+
12
  # or, to load one that corresponds to a checkpoint on the hub:
13
+ # feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
14
+ feature_extractor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
15
+
16
+
17
 
18
  from transformers import VisionEncoderDecoderModel
19
  # initialize a vit-bert from a pretrained ViT and a pretrained BERT model. Note that the cross-attention layers will be randomly initialized
 
28
  #####################
29
  from transformers import AutoTokenizer
30
  repo_name = "ydshieh/vit-gpt2-coco-en"
31
+ # feature_extractor = ViTFeatureExtractor.from_pretrained(repo_name)
32
+ feature_extractor = ViTImageProcessor.from_pretrained(repo_name)
33
+
34
+
35
  tokenizer = AutoTokenizer.from_pretrained(repo_name)
36
  model = VisionEncoderDecoderModel.from_pretrained(repo_name)
37