Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,8 +8,11 @@ from PIL import Image
|
|
8 |
import gradio as gr
|
9 |
import openai
|
10 |
import requests
|
|
|
11 |
from tqdm import tqdm
|
12 |
from io import BytesIO
|
|
|
|
|
13 |
|
14 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
15 |
# π§ STEP 1: LOAD CLIP MODEL
|
@@ -18,7 +21,49 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
18 |
model, preprocess = clip.load("ViT-B/32", device=device)
|
19 |
|
20 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
21 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
23 |
def load_profile_data(json_file_path=None, json_data=None):
|
24 |
"""Load profile data either from a file or directly from JSON data"""
|
@@ -28,23 +73,71 @@ def load_profile_data(json_file_path=None, json_data=None):
|
|
28 |
elif json_data:
|
29 |
profiles = json_data
|
30 |
else:
|
31 |
-
#
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
"
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
return profiles
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
47 |
-
# πΌοΈ STEP
|
48 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
49 |
def download_and_process_image(url):
|
50 |
"""Download image from URL and return PIL Image"""
|
@@ -57,59 +150,106 @@ def download_and_process_image(url):
|
|
57 |
print(f"β οΈ Error downloading image from {url}: {e}")
|
58 |
return None
|
59 |
|
60 |
-
def
|
61 |
-
"""Generate CLIP embeddings for profile images"""
|
62 |
-
|
63 |
-
|
64 |
-
profile_info = [] # Store name, age, etc. for each image
|
65 |
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
img = download_and_process_image(photo_url)
|
79 |
-
if img is None:
|
80 |
-
continue
|
81 |
-
|
82 |
-
img_input = preprocess(img).unsqueeze(0).to(device)
|
83 |
-
with torch.no_grad():
|
84 |
-
emb = model.encode_image(img_input).cpu().numpy().flatten()
|
85 |
-
emb /= np.linalg.norm(emb)
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
if embeddings:
|
104 |
-
|
105 |
else:
|
106 |
-
|
107 |
|
108 |
-
print(f"
|
109 |
-
return
|
110 |
|
111 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
112 |
-
# β‘ STEP
|
113 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
114 |
def build_faiss_index(embeddings):
|
115 |
"""Build FAISS index from embeddings"""
|
@@ -122,7 +262,7 @@ def build_faiss_index(embeddings):
|
|
122 |
return index
|
123 |
|
124 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
125 |
-
# π STEP
|
126 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
127 |
def init_openai():
|
128 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
@@ -130,10 +270,10 @@ def init_openai():
|
|
130 |
print("β οΈ Warning: OPENAI_API_KEY not found. GPT-4 analysis will not be available.")
|
131 |
|
132 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
133 |
-
# π STEP
|
134 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
135 |
-
def search_similar_faces(user_image, index, image_urls, profile_info, top_k=
|
136 |
-
"""Search for similar faces using CLIP + FAISS"""
|
137 |
if index is None:
|
138 |
return [], [], 0, "No index available. Please load profile data first."
|
139 |
|
@@ -146,6 +286,7 @@ def search_similar_faces(user_image, index, image_urls, profile_info, top_k=5):
|
|
146 |
except Exception as e:
|
147 |
return [], [], 0, f"Image preprocessing failed: {e}"
|
148 |
|
|
|
149 |
scores, indices = index.search(query_emb, top_k)
|
150 |
scores, indices = scores.flatten(), indices.flatten()
|
151 |
|
@@ -156,6 +297,10 @@ def search_similar_faces(user_image, index, image_urls, profile_info, top_k=5):
|
|
156 |
idx = indices[i]
|
157 |
score = scores[i]
|
158 |
|
|
|
|
|
|
|
|
|
159 |
try:
|
160 |
url = image_urls[idx]
|
161 |
info = profile_info[idx]
|
@@ -171,12 +316,14 @@ def search_similar_faces(user_image, index, image_urls, profile_info, top_k=5):
|
|
171 |
except Exception as e:
|
172 |
print(f"β οΈ Error processing match at index {idx}: {e}")
|
173 |
|
174 |
-
|
|
|
|
|
175 |
|
176 |
return matching_images, match_details, risk_score
|
177 |
|
178 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
179 |
-
# π§ STEP
|
180 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
181 |
def generate_gpt4_analysis(match_details):
|
182 |
"""Generate fun analysis using GPT-4"""
|
@@ -184,13 +331,15 @@ def generate_gpt4_analysis(match_details):
|
|
184 |
return "GPT-4 analysis not available (API key not configured)"
|
185 |
|
186 |
if not match_details:
|
187 |
-
return "No matches found for analysis"
|
188 |
|
189 |
try:
|
190 |
names = [f"{d['info']['Name']} ({d['info']['Age']})" for d in match_details]
|
|
|
191 |
|
192 |
prompt = (
|
193 |
-
f"The uploaded face matches closely with: {', '.join(names)}. "
|
|
|
194 |
f"Based on this, should the user be suspicious? "
|
195 |
f"Analyze like a funny but smart AI dating detective. Keep it concise."
|
196 |
)
|
@@ -208,7 +357,7 @@ def generate_gpt4_analysis(match_details):
|
|
208 |
return f"(OpenAI error): {e}"
|
209 |
|
210 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
211 |
-
# ποΈ STEP
|
212 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
213 |
class TinderScanner:
|
214 |
def __init__(self):
|
@@ -216,49 +365,81 @@ class TinderScanner:
|
|
216 |
self.image_urls = []
|
217 |
self.profile_info = []
|
218 |
self.profiles = []
|
|
|
|
|
|
|
|
|
219 |
|
220 |
# Initialize OpenAI
|
221 |
init_openai()
|
222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
def load_data(self, json_text=None, json_file=None):
|
224 |
"""Load profile data and build index"""
|
225 |
try:
|
|
|
226 |
if json_text:
|
227 |
json_data = json.loads(json_text)
|
228 |
self.profiles = load_profile_data(json_data=json_data)
|
229 |
elif json_file:
|
230 |
self.profiles = load_profile_data(json_file_path=json_file)
|
231 |
else:
|
232 |
-
|
|
|
233 |
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
if len(embeddings) > 0:
|
237 |
self.index = build_faiss_index(embeddings)
|
238 |
-
return f"β
|
|
|
239 |
else:
|
240 |
return "β οΈ No valid images found in the provided data"
|
241 |
except Exception as e:
|
242 |
return f"β Error loading data: {e}"
|
243 |
|
244 |
-
def scan_face(self, user_image,
|
245 |
-
"""Process a user image and find matches"""
|
246 |
-
#
|
247 |
-
if json_input and not self.index:
|
248 |
-
load_result = self.load_data(json_text=json_input)
|
249 |
-
if "Successfully" not in load_result:
|
250 |
-
return [], "", "", load_result
|
251 |
-
|
252 |
if not self.index:
|
253 |
-
|
|
|
|
|
254 |
|
255 |
if user_image is None:
|
256 |
return [], "", "", "Please upload a face image"
|
257 |
|
258 |
images, match_details, risk_score = search_similar_faces(
|
259 |
-
user_image, self.index, self.image_urls, self.profile_info
|
|
|
260 |
)
|
261 |
|
|
|
|
|
|
|
262 |
# Format match captions
|
263 |
captions = []
|
264 |
for detail in match_details:
|
@@ -271,28 +452,38 @@ class TinderScanner:
|
|
271 |
return images, "\n".join(captions), f"{risk_score}/100", explanation
|
272 |
|
273 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
274 |
-
# π₯οΈ STEP
|
275 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
276 |
def create_ui():
|
277 |
scanner = TinderScanner()
|
278 |
|
279 |
-
with gr.Blocks(title="
|
280 |
-
gr.Markdown("# π Tinder Scanner Pro β Face
|
281 |
-
gr.Markdown("Scan a face image to find
|
282 |
|
283 |
with gr.Tabs():
|
284 |
with gr.TabItem("Setup Data"):
|
285 |
with gr.Row():
|
286 |
with gr.Column():
|
|
|
|
|
|
|
|
|
287 |
json_input = gr.Textbox(
|
288 |
label="JSON Profile Data",
|
289 |
placeholder='Paste JSON data here. Format: [{"Id": "...", "Name": "...", "Age": 25, "Photos": ["url1", "url2"]}]',
|
290 |
lines=10
|
291 |
)
|
292 |
-
|
|
|
293 |
data_status = gr.Textbox(label="Status")
|
294 |
|
295 |
-
|
|
|
|
|
|
|
|
|
|
|
296 |
fn=scanner.load_data,
|
297 |
inputs=[json_input],
|
298 |
outputs=[data_status]
|
@@ -302,24 +493,24 @@ def create_ui():
|
|
302 |
with gr.Row():
|
303 |
with gr.Column():
|
304 |
user_image = gr.Image(type="pil", label="Upload a Face Image")
|
305 |
-
scan_btn = gr.Button("
|
306 |
|
307 |
with gr.Column():
|
308 |
-
matches_gallery = gr.Gallery(label="π
|
309 |
match_details = gr.Textbox(label="Match Details")
|
310 |
risk_score = gr.Textbox(label="π¨ Similarity Score")
|
311 |
gpt_analysis = gr.Textbox(label="π§ GPT-4 Analysis")
|
312 |
|
313 |
scan_btn.click(
|
314 |
-
fn=scanner.scan_face,
|
315 |
-
inputs=[user_image
|
316 |
outputs=[matches_gallery, match_details, risk_score, gpt_analysis]
|
317 |
)
|
318 |
|
319 |
return demo
|
320 |
|
321 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
322 |
-
# π STEP
|
323 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
324 |
if __name__ == "__main__":
|
325 |
demo = create_ui()
|
|
|
8 |
import gradio as gr
|
9 |
import openai
|
10 |
import requests
|
11 |
+
import sqlite3
|
12 |
from tqdm import tqdm
|
13 |
from io import BytesIO
|
14 |
+
from datetime import datetime
|
15 |
+
from pathlib import Path
|
16 |
|
17 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
18 |
# π§ STEP 1: LOAD CLIP MODEL
|
|
|
21 |
model, preprocess = clip.load("ViT-B/32", device=device)
|
22 |
|
23 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
24 |
+
# π STEP 2: PATH CONFIGURATION
|
25 |
+
# βββββββββββββββββββββββββββββββββββββββββββββ
|
26 |
+
# Default paths for Hugging Face Spaces
|
27 |
+
HF_SPACE_PATH = os.getenv("HF_SPACE_PATH", ".")
|
28 |
+
DEFAULT_JSON_PATH = os.path.join(HF_SPACE_PATH, "profiles.json")
|
29 |
+
DEFAULT_DB_PATH = os.path.join(HF_SPACE_PATH, "tinder_profiles.db")
|
30 |
+
|
31 |
+
# βββββββββββββββββββββββββββββββββββββββββββββ
|
32 |
+
# ποΈ STEP 3: DATABASE SETUP
|
33 |
+
# βββββββββββββββββββββββββββββββββββββββββββββ
|
34 |
+
def setup_database(db_path=DEFAULT_DB_PATH):
|
35 |
+
"""Initialize SQLite database with required tables"""
|
36 |
+
conn = sqlite3.connect(db_path)
|
37 |
+
cursor = conn.cursor()
|
38 |
+
|
39 |
+
# Create tables if they don't exist
|
40 |
+
cursor.execute('''
|
41 |
+
CREATE TABLE IF NOT EXISTS profiles (
|
42 |
+
id TEXT PRIMARY KEY,
|
43 |
+
name TEXT,
|
44 |
+
age INTEGER,
|
45 |
+
bio TEXT,
|
46 |
+
added_date TEXT
|
47 |
+
)
|
48 |
+
''')
|
49 |
+
|
50 |
+
cursor.execute('''
|
51 |
+
CREATE TABLE IF NOT EXISTS photos (
|
52 |
+
photo_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
53 |
+
profile_id TEXT,
|
54 |
+
url TEXT UNIQUE,
|
55 |
+
embedding BLOB,
|
56 |
+
FOREIGN KEY (profile_id) REFERENCES profiles(id)
|
57 |
+
)
|
58 |
+
''')
|
59 |
+
|
60 |
+
conn.commit()
|
61 |
+
conn.close()
|
62 |
+
print(f"β
Database initialized at {db_path}")
|
63 |
+
return db_path
|
64 |
+
|
65 |
+
# βββββββββββββββββββββββββββββββββββββββββββββ
|
66 |
+
# π¦ STEP 4: PROFILE DATA MANAGEMENT
|
67 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
68 |
def load_profile_data(json_file_path=None, json_data=None):
|
69 |
"""Load profile data either from a file or directly from JSON data"""
|
|
|
73 |
elif json_data:
|
74 |
profiles = json_data
|
75 |
else:
|
76 |
+
# Default to profiles.json in the Hugging Face space
|
77 |
+
if os.path.exists(DEFAULT_JSON_PATH):
|
78 |
+
with open(DEFAULT_JSON_PATH, 'r') as f:
|
79 |
+
profiles = json.load(f)
|
80 |
+
else:
|
81 |
+
# Sample data structure as fallback
|
82 |
+
profiles = [
|
83 |
+
{
|
84 |
+
"Id": "sample-id",
|
85 |
+
"Name": "Sample Profile",
|
86 |
+
"Age": 25,
|
87 |
+
"Bio": "Sample bio",
|
88 |
+
"Photos": [
|
89 |
+
"https://example.com/sample.jpg"
|
90 |
+
]
|
91 |
+
}
|
92 |
+
]
|
93 |
|
94 |
return profiles
|
95 |
|
96 |
+
def store_profiles_in_db(profiles, db_path=DEFAULT_DB_PATH):
|
97 |
+
"""Store profiles in the SQLite database"""
|
98 |
+
conn = sqlite3.connect(db_path)
|
99 |
+
cursor = conn.cursor()
|
100 |
+
|
101 |
+
today = datetime.now().strftime("%Y-%m-%d")
|
102 |
+
new_profiles = 0
|
103 |
+
new_photos = 0
|
104 |
+
|
105 |
+
for profile in tqdm(profiles, desc="Storing profiles"):
|
106 |
+
profile_id = profile.get("Id", str(hash(profile.get("Name", "") + str(profile.get("Age", 0)))))
|
107 |
+
name = profile.get("Name", "Unknown")
|
108 |
+
age = profile.get("Age", 0)
|
109 |
+
bio = profile.get("Bio", "")
|
110 |
+
|
111 |
+
# Check if profile exists
|
112 |
+
cursor.execute("SELECT id FROM profiles WHERE id=?", (profile_id,))
|
113 |
+
exists = cursor.fetchone()
|
114 |
+
|
115 |
+
if not exists:
|
116 |
+
cursor.execute(
|
117 |
+
"INSERT INTO profiles (id, name, age, bio, added_date) VALUES (?, ?, ?, ?, ?)",
|
118 |
+
(profile_id, name, age, bio, today)
|
119 |
+
)
|
120 |
+
new_profiles += 1
|
121 |
+
|
122 |
+
# Add photos
|
123 |
+
for photo_url in profile.get("Photos", []):
|
124 |
+
cursor.execute("SELECT photo_id FROM photos WHERE url=?", (photo_url,))
|
125 |
+
photo_exists = cursor.fetchone()
|
126 |
+
|
127 |
+
if not photo_exists:
|
128 |
+
cursor.execute(
|
129 |
+
"INSERT INTO photos (profile_id, url, embedding) VALUES (?, ?, NULL)",
|
130 |
+
(profile_id, photo_url)
|
131 |
+
)
|
132 |
+
new_photos += 1
|
133 |
+
|
134 |
+
conn.commit()
|
135 |
+
conn.close()
|
136 |
+
|
137 |
+
return new_profiles, new_photos
|
138 |
+
|
139 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
140 |
+
# πΌοΈ STEP 5: IMAGE PROCESSING & EMBEDDINGS
|
141 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
142 |
def download_and_process_image(url):
|
143 |
"""Download image from URL and return PIL Image"""
|
|
|
150 |
print(f"β οΈ Error downloading image from {url}: {e}")
|
151 |
return None
|
152 |
|
153 |
+
def generate_and_store_embeddings(db_path=DEFAULT_DB_PATH, max_images=1000):
|
154 |
+
"""Generate CLIP embeddings for profile images and store in database"""
|
155 |
+
conn = sqlite3.connect(db_path)
|
156 |
+
cursor = conn.cursor()
|
|
|
157 |
|
158 |
+
# Get photos without embeddings
|
159 |
+
cursor.execute("""
|
160 |
+
SELECT p.photo_id, p.url, pr.id, pr.name, pr.age, pr.bio
|
161 |
+
FROM photos p
|
162 |
+
JOIN profiles pr ON p.profile_id = pr.id
|
163 |
+
WHERE p.embedding IS NULL
|
164 |
+
LIMIT ?
|
165 |
+
""", (max_images,))
|
166 |
|
167 |
+
photos = cursor.fetchall()
|
168 |
+
|
169 |
+
processed = 0
|
170 |
+
errors = 0
|
171 |
+
|
172 |
+
print(f"π§ Generating CLIP embeddings for {len(photos)} new images...")
|
173 |
+
for photo in tqdm(photos, desc="Processing images"):
|
174 |
+
photo_id, url, profile_id, name, age, bio = photo
|
175 |
|
176 |
+
try:
|
177 |
+
img = download_and_process_image(url)
|
178 |
+
if img is None:
|
179 |
+
errors += 1
|
180 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
+
img_input = preprocess(img).unsqueeze(0).to(device)
|
183 |
+
with torch.no_grad():
|
184 |
+
emb = model.encode_image(img_input).cpu().numpy().flatten()
|
185 |
+
emb /= np.linalg.norm(emb) # Normalize
|
186 |
+
|
187 |
+
# Store the embedding as a binary blob
|
188 |
+
cursor.execute(
|
189 |
+
"UPDATE photos SET embedding = ? WHERE photo_id = ?",
|
190 |
+
(emb.tobytes(), photo_id)
|
191 |
+
)
|
192 |
+
|
193 |
+
processed += 1
|
194 |
+
|
195 |
+
# Commit every 10 images to avoid losing work
|
196 |
+
if processed % 10 == 0:
|
197 |
+
conn.commit()
|
198 |
|
199 |
+
except Exception as e:
|
200 |
+
print(f"β οΈ Error with {url}: {e}")
|
201 |
+
errors += 1
|
202 |
+
|
203 |
+
conn.commit()
|
204 |
+
conn.close()
|
205 |
+
|
206 |
+
print(f"β
Finished embedding {processed} images with {errors} errors.")
|
207 |
+
return processed, errors
|
208 |
+
|
209 |
+
def load_embeddings_from_db(db_path=DEFAULT_DB_PATH):
|
210 |
+
"""Load all embeddings, urls and profile info from the database"""
|
211 |
+
conn = sqlite3.connect(db_path)
|
212 |
+
cursor = conn.cursor()
|
213 |
+
|
214 |
+
cursor.execute("""
|
215 |
+
SELECT p.embedding, p.url, pr.id, pr.name, pr.age, pr.bio
|
216 |
+
FROM photos p
|
217 |
+
JOIN profiles pr ON p.profile_id = pr.id
|
218 |
+
WHERE p.embedding IS NOT NULL
|
219 |
+
""")
|
220 |
+
|
221 |
+
result = cursor.fetchall()
|
222 |
+
conn.close()
|
223 |
+
|
224 |
+
embeddings = []
|
225 |
+
image_urls = []
|
226 |
+
profile_info = []
|
227 |
+
|
228 |
+
for row in result:
|
229 |
+
embedding_bytes, url, profile_id, name, age, bio = row
|
230 |
+
if embedding_bytes: # Ensure we have embedding data
|
231 |
+
# Convert bytes back to numpy array
|
232 |
+
emb = np.frombuffer(embedding_bytes, dtype=np.float32)
|
233 |
+
|
234 |
+
embeddings.append(emb)
|
235 |
+
image_urls.append(url)
|
236 |
+
profile_info.append({
|
237 |
+
"Id": profile_id,
|
238 |
+
"Name": name,
|
239 |
+
"Age": age,
|
240 |
+
"Bio": bio
|
241 |
+
})
|
242 |
|
243 |
if embeddings:
|
244 |
+
embeddings_array = np.vstack(embeddings).astype("float32")
|
245 |
else:
|
246 |
+
embeddings_array = np.array([]).astype("float32")
|
247 |
|
248 |
+
print(f"π Loaded {len(embeddings_array)} embeddings from database")
|
249 |
+
return embeddings_array, image_urls, profile_info
|
250 |
|
251 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
252 |
+
# β‘ STEP 6: BUILD FAISS INDEX
|
253 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
254 |
def build_faiss_index(embeddings):
|
255 |
"""Build FAISS index from embeddings"""
|
|
|
262 |
return index
|
263 |
|
264 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
265 |
+
# π STEP 7: OPENAI API SETUP
|
266 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
267 |
def init_openai():
|
268 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
|
|
270 |
print("β οΈ Warning: OPENAI_API_KEY not found. GPT-4 analysis will not be available.")
|
271 |
|
272 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
273 |
+
# π STEP 8: SEARCH FUNCTIONALITY
|
274 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
275 |
+
def search_similar_faces(user_image, index, image_urls, profile_info, top_k=20, min_score=0.80):
|
276 |
+
"""Search for similar faces using CLIP + FAISS with minimum score threshold"""
|
277 |
if index is None:
|
278 |
return [], [], 0, "No index available. Please load profile data first."
|
279 |
|
|
|
286 |
except Exception as e:
|
287 |
return [], [], 0, f"Image preprocessing failed: {e}"
|
288 |
|
289 |
+
# Search for more matches than we need (we'll filter by score)
|
290 |
scores, indices = index.search(query_emb, top_k)
|
291 |
scores, indices = scores.flatten(), indices.flatten()
|
292 |
|
|
|
297 |
idx = indices[i]
|
298 |
score = scores[i]
|
299 |
|
300 |
+
# Only include matches with score >= min_score (0.80)
|
301 |
+
if score < min_score:
|
302 |
+
continue
|
303 |
+
|
304 |
try:
|
305 |
url = image_urls[idx]
|
306 |
info = profile_info[idx]
|
|
|
316 |
except Exception as e:
|
317 |
print(f"β οΈ Error processing match at index {idx}: {e}")
|
318 |
|
319 |
+
# Calculate risk score based on high-quality matches only
|
320 |
+
match_scores = [d["score"] for d in match_details]
|
321 |
+
risk_score = min(100, int(np.mean(match_scores) * 100)) if match_scores else 0
|
322 |
|
323 |
return matching_images, match_details, risk_score
|
324 |
|
325 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
326 |
+
# π§ STEP 9: GPT-4 ANALYSIS
|
327 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
328 |
def generate_gpt4_analysis(match_details):
|
329 |
"""Generate fun analysis using GPT-4"""
|
|
|
331 |
return "GPT-4 analysis not available (API key not configured)"
|
332 |
|
333 |
if not match_details:
|
334 |
+
return "No high-similarity matches found for analysis"
|
335 |
|
336 |
try:
|
337 |
names = [f"{d['info']['Name']} ({d['info']['Age']})" for d in match_details]
|
338 |
+
scores = [f"{d['score']:.2f}" for d in match_details]
|
339 |
|
340 |
prompt = (
|
341 |
+
f"The uploaded face matches closely with: {', '.join(names)} with similarity scores: {', '.join(scores)}. "
|
342 |
+
f"These are very high similarity matches (0.80-1.00 range). "
|
343 |
f"Based on this, should the user be suspicious? "
|
344 |
f"Analyze like a funny but smart AI dating detective. Keep it concise."
|
345 |
)
|
|
|
357 |
return f"(OpenAI error): {e}"
|
358 |
|
359 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
360 |
+
# ποΈ STEP 10: APPLICATION CLASS
|
361 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
362 |
class TinderScanner:
|
363 |
def __init__(self):
|
|
|
365 |
self.image_urls = []
|
366 |
self.profile_info = []
|
367 |
self.profiles = []
|
368 |
+
self.db_path = None
|
369 |
+
|
370 |
+
# Setup database
|
371 |
+
self.db_path = setup_database()
|
372 |
|
373 |
# Initialize OpenAI
|
374 |
init_openai()
|
375 |
|
376 |
+
def init_from_database(self):
|
377 |
+
"""Initialize scanner from database content"""
|
378 |
+
try:
|
379 |
+
# Load embeddings from database
|
380 |
+
embeddings, self.image_urls, self.profile_info = load_embeddings_from_db(self.db_path)
|
381 |
+
|
382 |
+
if len(embeddings) > 0:
|
383 |
+
self.index = build_faiss_index(embeddings)
|
384 |
+
return f"β
Successfully loaded {len(self.image_urls)} photos from database"
|
385 |
+
else:
|
386 |
+
return "β οΈ No embeddings found in database. Upload profile data first."
|
387 |
+
except Exception as e:
|
388 |
+
return f"β Error loading from database: {e}"
|
389 |
+
|
390 |
def load_data(self, json_text=None, json_file=None):
|
391 |
"""Load profile data and build index"""
|
392 |
try:
|
393 |
+
# Load profiles from JSON
|
394 |
if json_text:
|
395 |
json_data = json.loads(json_text)
|
396 |
self.profiles = load_profile_data(json_data=json_data)
|
397 |
elif json_file:
|
398 |
self.profiles = load_profile_data(json_file_path=json_file)
|
399 |
else:
|
400 |
+
# Try to load from default location
|
401 |
+
self.profiles = load_profile_data(json_file_path=DEFAULT_JSON_PATH)
|
402 |
|
403 |
+
if not self.profiles:
|
404 |
+
return "β οΈ No profile data found"
|
405 |
+
|
406 |
+
# Store profiles in database
|
407 |
+
new_profiles, new_photos = store_profiles_in_db(self.profiles, self.db_path)
|
408 |
+
|
409 |
+
# Generate embeddings for new photos
|
410 |
+
processed, errors = generate_and_store_embeddings(self.db_path)
|
411 |
+
|
412 |
+
# Load all embeddings (including newly processed ones)
|
413 |
+
embeddings, self.image_urls, self.profile_info = load_embeddings_from_db(self.db_path)
|
414 |
|
415 |
if len(embeddings) > 0:
|
416 |
self.index = build_faiss_index(embeddings)
|
417 |
+
return (f"β
Database updated: {new_profiles} new profiles, {new_photos} new photos, "
|
418 |
+
f"{processed} photos processed. Total: {len(self.image_urls)} photos indexed.")
|
419 |
else:
|
420 |
return "β οΈ No valid images found in the provided data"
|
421 |
except Exception as e:
|
422 |
return f"β Error loading data: {e}"
|
423 |
|
424 |
+
def scan_face(self, user_image, min_score=0.80):
|
425 |
+
"""Process a user image and find matches with minimum score"""
|
426 |
+
# Try to initialize from database if not already
|
|
|
|
|
|
|
|
|
|
|
427 |
if not self.index:
|
428 |
+
init_result = self.init_from_database()
|
429 |
+
if "Successfully" not in init_result:
|
430 |
+
return [], "", "", "Please load profile data first by providing JSON input"
|
431 |
|
432 |
if user_image is None:
|
433 |
return [], "", "", "Please upload a face image"
|
434 |
|
435 |
images, match_details, risk_score = search_similar_faces(
|
436 |
+
user_image, self.index, self.image_urls, self.profile_info,
|
437 |
+
min_score=min_score
|
438 |
)
|
439 |
|
440 |
+
if not match_details:
|
441 |
+
return [], "", "0/100", "No matches with similarity score β₯ 0.80 found"
|
442 |
+
|
443 |
# Format match captions
|
444 |
captions = []
|
445 |
for detail in match_details:
|
|
|
452 |
return images, "\n".join(captions), f"{risk_score}/100", explanation
|
453 |
|
454 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
455 |
+
# π₯οΈ STEP 11: GRADIO UI
|
456 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
457 |
def create_ui():
|
458 |
scanner = TinderScanner()
|
459 |
|
460 |
+
with gr.Blocks(title="Tinder Scanner Pro") as demo:
|
461 |
+
gr.Markdown("# π Tinder Scanner Pro β High-Similarity Face Matcher")
|
462 |
+
gr.Markdown("Scan a face image to find high-similarity matches (0.80-1.00) in Tinder profiles.")
|
463 |
|
464 |
with gr.Tabs():
|
465 |
with gr.TabItem("Setup Data"):
|
466 |
with gr.Row():
|
467 |
with gr.Column():
|
468 |
+
gr.Markdown("### Load from profiles.json (auto)")
|
469 |
+
auto_load_btn = gr.Button("Load from profiles.json", variant="primary")
|
470 |
+
|
471 |
+
gr.Markdown("### OR: Paste JSON Data")
|
472 |
json_input = gr.Textbox(
|
473 |
label="JSON Profile Data",
|
474 |
placeholder='Paste JSON data here. Format: [{"Id": "...", "Name": "...", "Age": 25, "Photos": ["url1", "url2"]}]',
|
475 |
lines=10
|
476 |
)
|
477 |
+
manual_load_btn = gr.Button("Load Pasted Data", variant="secondary")
|
478 |
+
|
479 |
data_status = gr.Textbox(label="Status")
|
480 |
|
481 |
+
auto_load_btn.click(
|
482 |
+
fn=lambda: scanner.load_data(),
|
483 |
+
outputs=[data_status]
|
484 |
+
)
|
485 |
+
|
486 |
+
manual_load_btn.click(
|
487 |
fn=scanner.load_data,
|
488 |
inputs=[json_input],
|
489 |
outputs=[data_status]
|
|
|
493 |
with gr.Row():
|
494 |
with gr.Column():
|
495 |
user_image = gr.Image(type="pil", label="Upload a Face Image")
|
496 |
+
scan_btn = gr.Button("Find High-Similarity Matches (0.80+)", variant="primary")
|
497 |
|
498 |
with gr.Column():
|
499 |
+
matches_gallery = gr.Gallery(label="π High-Similarity Matches", columns=[3], height="auto")
|
500 |
match_details = gr.Textbox(label="Match Details")
|
501 |
risk_score = gr.Textbox(label="π¨ Similarity Score")
|
502 |
gpt_analysis = gr.Textbox(label="π§ GPT-4 Analysis")
|
503 |
|
504 |
scan_btn.click(
|
505 |
+
fn=lambda img: scanner.scan_face(img, min_score=0.80),
|
506 |
+
inputs=[user_image],
|
507 |
outputs=[matches_gallery, match_details, risk_score, gpt_analysis]
|
508 |
)
|
509 |
|
510 |
return demo
|
511 |
|
512 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
513 |
+
# π STEP 12: MAIN EXECUTION
|
514 |
# βββββββββββββββββββββββββββββββββββββββββββββ
|
515 |
if __name__ == "__main__":
|
516 |
demo = create_ui()
|