ChandraP12330
commited on
Commit
•
5cb6bf2
1
Parent(s):
150c987
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ from PIL import Image
|
|
4 |
#import tensorflow
|
5 |
import torch
|
6 |
|
|
|
7 |
# Create the caption pipeline
|
8 |
initial_caption = pipeline('image-to-text', model="Salesforce/blip-image-captioning-large")
|
9 |
|
@@ -13,7 +14,49 @@ if uploaded_image is not None:
|
|
13 |
image= Image.open(uploaded_image)
|
14 |
st.image(image, caption="Uploaded Image", use_column_width=True)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Generate the caption
|
17 |
if st.button("Generate Caption"):
|
18 |
-
|
19 |
-
st.write(
|
|
|
4 |
#import tensorflow
|
5 |
import torch
|
6 |
|
7 |
+
##BLIP
|
8 |
# Create the caption pipeline
|
9 |
initial_caption = pipeline('image-to-text', model="Salesforce/blip-image-captioning-large")
|
10 |
|
|
|
14 |
image= Image.open(uploaded_image)
|
15 |
st.image(image, caption="Uploaded Image", use_column_width=True)
|
16 |
|
17 |
+
initial_caption = initial_caption(image)
|
18 |
+
initial_caption = initial_caption[0]['generated_text']
|
19 |
+
|
20 |
+
##CLIP
|
21 |
+
from transformers import CLIPProcessor, CLIPModel
|
22 |
+
model_id = "openai/clip-vit-large-patch14"
|
23 |
+
processor = CLIPProcessor.from_pretrained(model_id)
|
24 |
+
model = CLIPModel.from_pretrained(model_id)
|
25 |
+
scene_labels=['Arrest',
|
26 |
+
'Arson',
|
27 |
+
'Explosion',
|
28 |
+
'public fight',
|
29 |
+
'Normal',
|
30 |
+
'Road Accident',
|
31 |
+
'Robbery',
|
32 |
+
'Shooting',
|
33 |
+
'Stealing',
|
34 |
+
'Vandalism',
|
35 |
+
'Suspicious activity',
|
36 |
+
'Tailgating',
|
37 |
+
'Unauthorized entry',
|
38 |
+
'Protest/Demonstration',
|
39 |
+
'Drone suspicious activity',
|
40 |
+
'Fire/Smoke detection',
|
41 |
+
'Medical emergency',
|
42 |
+
'Suspicious package/object',
|
43 |
+
'Threatening',
|
44 |
+
'Attack',
|
45 |
+
'Shoplifting',
|
46 |
+
'burglary ',
|
47 |
+
'distress',
|
48 |
+
'assault']
|
49 |
+
image = Image.open(uploaded_image)
|
50 |
+
inputs = processor(text=scene_labels, images=image, return_tensors="pt", padding=True)
|
51 |
+
outputs = model(**inputs)
|
52 |
+
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
|
53 |
+
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
|
54 |
+
context_raw= scene_labels[probs.argmax(-1)]
|
55 |
+
context= 'the image is depicting scene of '+ context_raw
|
56 |
+
|
57 |
+
##LLM
|
58 |
+
|
59 |
# Generate the caption
|
60 |
if st.button("Generate Caption"):
|
61 |
+
|
62 |
+
st.write()
|