mdanish commited on
Commit
17e1fb7
·
verified ·
1 Parent(s): f827820

Upload ./requirements.txt with huggingface_hub

Browse files
Files changed (1) hide show
  1. requirements.txt +121 -5
requirements.txt CHANGED
@@ -1,5 +1,121 @@
1
- streamlit
2
- torch
3
- open_clip-torch
4
- Pillow
5
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import numpy as np
4
+ import torch
5
+ from sklearn.utils.extmath import softmax
6
+ import open_clip
7
+
8
+ #from transformers import CLIPProcessor, CLIPModel
9
+
10
+ knnpath = '20241204-ams-no-env-open_clip_ViT-H-14-378-quickgelu.npz'
11
+ clip_model_name = 'ViT-H-14-378-quickgelu'
12
+ pretrained_name = 'dfn5b'
13
+
14
+ categories = ['walkability', 'bikeability', 'pleasantness', 'greenness', 'safety']
15
+
16
+ # Set page config
17
+ st.set_page_config(
18
+ page_title="Percept",
19
+ layout="wide"
20
+ )
21
+
22
+ debug = True
23
+
24
+ #st.write("Available models:", open_clip.list_models())
25
+
26
+ @st.cache_resource
27
+ def load_model():
28
+ """Load the OpenCLIP model and return model and processor"""
29
+ model, _, preprocess = open_clip.create_model_and_transforms(
30
+ clip_model_name, pretrained=pretrained_name
31
+ )
32
+ tokenizer = open_clip.get_tokenizer(clip_model_name)
33
+ return model, preprocess, tokenizer
34
+
35
+ def process_image(image, preprocess):
36
+ """Process image and return tensor"""
37
+ if isinstance(image, str):
38
+ # If image is a URL
39
+ response = requests.get(image)
40
+ image = Image.open(BytesIO(response.content))
41
+ # Ensure image is in RGB mode
42
+ if image.mode != 'RGB':
43
+ image = image.convert('RGB')
44
+ processed_image = preprocess(image).unsqueeze(0)
45
+ return processed_image
46
+
47
+ def knn_get_score(knn, k, cat, vec):
48
+ allvecs = knn[f'{cat}_vecs']
49
+ if debug: st.write('allvecs.shape', allvecs.shape)
50
+ scores = knn[f'{cat}_scores']
51
+ if debug: st.write('scores.shape', scores.shape)
52
+ # Compute cosine similiarity of vec against allvecs
53
+ # (both are already normalized)
54
+ cos_sim_table = vec @ allvecs.T
55
+ if debug: st.write('cos_sim_table.shape', cos_sim_table.shape)
56
+ # Get sorted array indices by similiarity in descending order
57
+ sortinds = np.flip(np.argsort(cos_sim_table, axis=1), axis=1)
58
+ if debug: st.write('sortinds.shape', sortinds.shape)
59
+ # Get corresponding scores for the sorted vectors
60
+ kscores = scores[sortinds][:k]
61
+ if debug: st.write('kscores.shape', kscores.shape)
62
+ # Get actual sorted similiarity scores
63
+ ksims = cos_sim_table[:, sortinds][:,:k]
64
+ if debug: st.write('ksims.shape', ksims.shape)
65
+ # Apply normalization after exponential formula
66
+ ksims = softmax(10**ksims)
67
+ # Weighted sum
68
+ kweightedscore = np.sum(kscores * ksims)
69
+ return kweightedscore
70
+
71
+
72
+ @st.cache_resource
73
+ def load_knn():
74
+ return np.load(knnpath)
75
+
76
+ def main():
77
+ st.title("Percept: Human Perception of Street View Image Analyzer")
78
+
79
+ try:
80
+ with st.spinner('Loading CLIP model... This may take a moment.'):
81
+ model, preprocess, tokenizer = load_model()
82
+ device = "cuda" if torch.cuda.is_available() else "cpu"
83
+ model = model.to(device)
84
+ except Exception as e:
85
+ st.error(f"Error loading model: {str(e)}")
86
+ st.info("Please make sure you have enough memory and the correct dependencies installed.")
87
+
88
+ with st.spinner('Loading KNN model... This may take a moment.'):
89
+ knn = load_knn()
90
+ if debug: st.write(knn['walkability_vecs'].shape)
91
+
92
+ file = st.file_uploader('Upload An Image')
93
+
94
+ if file:
95
+ try:
96
+ image = Image.open(file)
97
+
98
+ st.image(image, caption="Uploaded Image", width=640)
99
+
100
+ # Process image
101
+ with st.spinner('Processing image...'):
102
+ processed_image = process_image(image, preprocess)
103
+ processed_image = processed_image.to(device)
104
+
105
+ # Encode into CLIP vector
106
+ with torch.no_grad():
107
+ vec = model.encode_image(processed_image)
108
+
109
+ # Normalize vector
110
+ vec /= vec.norm(dim=-1, keepdim=True)
111
+ if debug: st.write(vec.shape)
112
+ vec = vec.numpy()
113
+ k = 40
114
+ for cat in ['walkability']:
115
+ st.write(cat, 'rating =', knn_get_score(knn, k, cat, vec))
116
+
117
+ except Exception as e:
118
+ st.error(f"Error processing image: {str(e)}")
119
+
120
+ if __name__ == "__main__":
121
+ main()