NourFakih commited on
Commit
003791f
·
verified ·
1 Parent(s): 83ba2ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -4,7 +4,7 @@ import zipfile
4
  import tempfile
5
  import base64
6
  from PIL import Image
7
- from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
8
  import pandas as pd
9
  from nltk.corpus import wordnet
10
  import spacy
@@ -28,8 +28,16 @@ nlp = spacy.load("en_core_web_sm")
28
  # Load the pre-trained model for image captioning
29
  model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
30
  model = VisionEncoderDecoderModel.from_pretrained(model_name)
31
- feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)
32
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
 
 
33
 
34
  def generate_caption(image):
35
  pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
 
4
  import tempfile
5
  import base64
6
  from PIL import Image
7
+ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
8
  import pandas as pd
9
  from nltk.corpus import wordnet
10
  import spacy
 
28
  # Load the pre-trained model for image captioning
29
  model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
30
  model = VisionEncoderDecoderModel.from_pretrained(model_name)
31
+ feature_extractor = ViTImageProcessor.from_pretrained(model_name)
32
  tokenizer = AutoTokenizer.from_pretrained(model_name)
33
+ # GPT2 only has bos/eos tokens but not decoder_start/pad tokens
34
+ tokenizer.pad_token = tokenizer.eos_token
35
+
36
+ # update the model config
37
+ model.config.eos_token_id = tokenizer.eos_token_id
38
+ model.config.decoder_start_token_id = tokenizer.bos_token_id
39
+ model.config.pad_token_id = tokenizer.pad_token_id
40
+
41
 
42
  def generate_caption(image):
43
  pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values