Srujan111 commited on
Commit
b0ddd97
·
1 Parent(s): 156c7cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -3
app.py CHANGED
@@ -1,5 +1,33 @@
1
- # Load model directly
2
- from transformers import AutoTokenizer, AutoModel
 
3
 
 
 
4
  tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
5
- model = AutoModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
2
+ import torch
3
+ from PIL import Image
4
 
5
+ model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
6
+ feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
7
  tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
8
+
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ model.to(device)
11
+
12
+
13
+
14
+ max_length = 16
15
+ num_beams = 4
16
+ gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
17
+ def predict_step(image_paths):
18
+ images = []
19
+ for image_path in image_paths:
20
+ i_image = Image.open(image_path)
21
+ if i_image.mode != "RGB":
22
+ i_image = i_image.convert(mode="RGB")
23
+
24
+ images.append(i_image)
25
+
26
+ pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
27
+ pixel_values = pixel_values.to(device)
28
+
29
+ output_ids = model.generate(pixel_values, **gen_kwargs)
30
+
31
+ preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
32
+ preds = [pred.strip() for pred in preds]
33
+ return preds