File size: 1,354 Bytes
5a546be
f5a4803
 
5a546be
 
f5a4803
 
 
 
5a546be
f5a4803
5a546be
f5a4803
 
 
 
 
5a546be
f5a4803
 
5a546be
f5a4803
 
5a546be
f5a4803
 
 
 
 
 
 
 
 
 
 
5a546be
f5a4803
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import streamlit as st
import torch
from transformers import AutoFeatureExtractor, AutoModelForSequenceClassification, AutoTokenizer
from PIL import Image

# Load the pretrained model and tokenizer
model_name = "nlpconnect/vit-gpt2-image-captioning"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Define a function to generate captions from an image
def generate_caption(image):
    inputs = tokenizer(image, return_tensors="pt")
    with torch.no_grad():
        logits = model(**inputs).logits
    caption = tokenizer.decode(logits.argmax(1)[0], skip_special_tokens=True)
    return caption

def main():
    st.title("Image to Text Captioning")

    with st.form("my_form"):
        uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])

        if uploaded_file is not None:
            # Display the uploaded image
            image = Image.open(uploaded_file)
            st.image(image, caption="Uploaded Image", use_column_width=True)
        
        clicked = st.form_submit_button("Generate Caption")
        if clicked:
            if "image" in locals():
                caption = generate_caption(image)
                st.subheader("Generated Caption:")
                st.write(caption)

if __name__ == "__main__":
    main()