ChandraP12330 commited on
Commit
5cb6bf2
1 Parent(s): 150c987

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -2
app.py CHANGED
@@ -4,6 +4,7 @@ from PIL import Image
4
  #import tensorflow
5
  import torch
6
 
 
7
  # Create the caption pipeline
8
  initial_caption = pipeline('image-to-text', model="Salesforce/blip-image-captioning-large")
9
 
@@ -13,7 +14,49 @@ if uploaded_image is not None:
13
  image= Image.open(uploaded_image)
14
  st.image(image, caption="Uploaded Image", use_column_width=True)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Generate the caption
17
  if st.button("Generate Caption"):
18
- captions = initial_caption(image)
19
- st.write(captions[0]['generated_text'])
 
4
  #import tensorflow
5
  import torch
6
 
7
+ ##BLIP
8
  # Create the caption pipeline
9
  initial_caption = pipeline('image-to-text', model="Salesforce/blip-image-captioning-large")
10
 
 
14
  image= Image.open(uploaded_image)
15
  st.image(image, caption="Uploaded Image", use_column_width=True)
16
 
17
+ initial_caption = initial_caption(image)
18
+ initial_caption = initial_caption[0]['generated_text']
19
+
20
+ ##CLIP
21
+ from transformers import CLIPProcessor, CLIPModel
22
+ model_id = "openai/clip-vit-large-patch14"
23
+ processor = CLIPProcessor.from_pretrained(model_id)
24
+ model = CLIPModel.from_pretrained(model_id)
25
+ scene_labels=['Arrest',
26
+ 'Arson',
27
+ 'Explosion',
28
+ 'public fight',
29
+ 'Normal',
30
+ 'Road Accident',
31
+ 'Robbery',
32
+ 'Shooting',
33
+ 'Stealing',
34
+ 'Vandalism',
35
+ 'Suspicious activity',
36
+ 'Tailgating',
37
+ 'Unauthorized entry',
38
+ 'Protest/Demonstration',
39
+ 'Drone suspicious activity',
40
+ 'Fire/Smoke detection',
41
+ 'Medical emergency',
42
+ 'Suspicious package/object',
43
+ 'Threatening',
44
+ 'Attack',
45
+ 'Shoplifting',
46
+ 'burglary ',
47
+ 'distress',
48
+ 'assault']
49
+ image = Image.open(uploaded_image)
50
+ inputs = processor(text=scene_labels, images=image, return_tensors="pt", padding=True)
51
+ outputs = model(**inputs)
52
+ logits_per_image = outputs.logits_per_image # this is the image-text similarity score
53
+ probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
54
+ context_raw= scene_labels[probs.argmax(-1)]
55
+ context= 'the image is depicting scene of '+ context_raw
56
+
57
+ ##LLM
58
+
59
  # Generate the caption
60
  if st.button("Generate Caption"):
61
+
62
+ st.write()