Makhinur commited on
Commit
1cd43eb
·
verified ·
1 Parent(s): c79a839

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -4
app.py CHANGED
@@ -1,6 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
- gr.load(
4
- "models/Salesforce/blip-image-captioning-base",
5
- provider="hf-inference",
6
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, PreTrainedTokenizerFast
2
+
3
+ model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
4
+ vit_feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
5
+ tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2")
6
+
7
+ def vit2distilgpt2(img):
8
+ pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
9
+ encoder_outputs = model.generate(pixel_values.to('cpu'), num_beams=5, num_return_sequences=3)
10
+ generated_sentences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
11
+
12
+ return generated_sentences
13
+
14
  import gradio as gr
15
 
16
+ inputs = [
17
+ gr.inputs.Image(type="pil", label="Original Images")
18
+ ]
19
+
20
+ outputs = [
21
+ gr.outputs.Textbox(label="Caption 1"),
22
+
23
+ ]
24
+
25
+ title = "Image Captioning using ViT + GPT2"
26
+ description = "ViT and GPT2 are used to generate Image Caption for the uploaded image. COCO DataSet is used for Training"
27
+ examples = [
28
+ ["Image1.png"],
29
+ ["Image2.png"],
30
+ ["Image3.png"]
31
+ ]
32
+
33
+ gr.Interface(
34
+ vit2distilgpt2,
35
+ inputs,
36
+ outputs,
37
+ title=title,
38
+ description=description,
39
+ examples=examples,
40
+ theme="huggingface",
41
+ ).launch(debug=True, enable_queue=True)