hungchiayu1 commited on
Commit
2d8fe48
·
1 Parent(s): ff8303f
Files changed (1) hide show
  1. app-Copy1.py +53 -0
app-Copy1.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ from PIL import Image
4
+ import urllib.request
5
+ import torch
6
+ from torch import nn
7
+ import numpy as np
8
+ from VT5 import VT5
9
+ from transformers import (
10
+ AutoModelForSeq2SeqLM,
11
+ AutoTokenizer,
12
+ Trainer,
13
+ TrainingArguments,
14
+ T5Tokenizer,
15
+ T5ForConditionalGeneration,
16
+ CLIPVisionModelWithProjection,
17
+ AutoProcessor
18
+ )
19
+
20
+
21
+ clip = CLIPVisionModelWithProjection.from_pretrained("openai/clip-vit-base-patch32")
22
+ processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
23
+
24
+ default_img = Image.open("gfg.png")
25
+ default_img = processor(images=default_img,return_tensors='pt').pixel_values
26
+
27
+
28
+ tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
29
+ t5 = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
30
+
31
+ vt5 = VT5(t5,tokenizer,clip)
32
+ vt5.load_state_dict(torch.load('weights.bin',map_location=torch.device('cpu')))
33
+
34
+ # Assuming you have this function that generates captions
35
+ def generate_caption(image):
36
+ # Your model code here
37
+ caption = "This is a placeholder caption"
38
+ print(image)
39
+ caption = vt5.generate_caption(image)
40
+ return caption
41
+
42
+ st.title("Image Captioning App")
43
+ #st.image(image.numpy().reshape(224,224,3), caption='Uploaded Image.', clamp=True,use_column_width=True)
44
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png","jpeg"])
45
+
46
+ if uploaded_file is not None:
47
+ image = Image.open(uploaded_file)
48
+ st.image(image, caption='Uploaded Image.', clamp=True,use_column_width=True)
49
+
50
+ st.write("")
51
+ st.write("Generating caption...")
52
+ caption = generate_caption(image)
53
+ st.write("Caption: ", caption)