Shahabmoin commited on
Commit
de9f3f9
·
verified ·
1 Parent(s): ac7c0b7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import torch
4
+ from transformers import CLIPProcessor, CLIPModel
5
+ import matplotlib.pyplot as plt
6
+
7
+ # Load the pre-trained CLIP model and processor
8
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
9
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
10
+
11
+ # Function to make predictions from the image
12
+ def predict_image_description(image):
13
+ # Preprocess the image and generate text inputs
14
+ inputs = processor(text=["a photo of a cat", "a photo of a dog", "a photo of a car", "a photo of a tree", "a photo of a house"],
15
+ images=image,
16
+ return_tensors="pt",
17
+ padding=True)
18
+
19
+ # Get model predictions
20
+ outputs = model(**inputs)
21
+ logits_per_image = outputs.logits_per_image # this is the image-text similarity score
22
+ probs = logits_per_image.softmax(dim=1) # Softmax to get probabilities
23
+
24
+ # Return top 3 predictions
25
+ top_3_probabilities, top_3_indices = torch.topk(probs, 3)
26
+ labels = ["a cat", "a dog", "a car", "a tree", "a house"]
27
+
28
+ predictions = []
29
+ for i in range(3):
30
+ prediction = labels[top_3_indices[0][i]] # Get the label
31
+ probability = top_3_probabilities[0][i].item() # Get probability
32
+ predictions.append(f"{prediction}: {probability * 100:.2f}%")
33
+
34
+ return predictions
35
+
36
+ # Streamlit UI
37
+ st.title("Real-Time Image-to-Text Generator")
38
+ st.markdown("Upload an image, and I will tell you what it is!")
39
+
40
+ # Image upload feature
41
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
42
+
43
+ if uploaded_file is not None:
44
+ # Open the uploaded image
45
+ image = Image.open(uploaded_file)
46
+
47
+ # Display the image
48
+ st.image(image, caption="Uploaded Image", use_column_width=True)
49
+
50
+ # Predict the description
51
+ predictions = predict_image_description(image)
52
+
53
+ # Display the predictions
54
+ st.write("Predictions:")
55
+ for prediction in predictions:
56
+ st.write(prediction)