Spaces:

ans123
/

StopCheat.AI_V_1.5

Sleeping

App Files Files Community

ans123 commited on Apr 15

Commit

dfe5531

verified ·

1 Parent(s): 1cc356f

Update app.py

Browse files

Files changed (1) hide show

app.py +181 -185

app.py CHANGED Viewed

@@ -8,10 +8,14 @@ import gradio as gr
 import openai
 from tqdm import tqdm
 from glob import glob
-import psycopg2
-from psycopg2.extras import execute_values
 import json
 import time
 # ─────────────────────────────────────────────
 # 📂 STEP 1: UNZIP TO CORRECT STRUCTURE
@@ -29,93 +33,72 @@ if not os.path.exists(unzip_dir):
 img_root = os.path.join(unzip_dir, "lfw-deepfunneled")
 # ─────────────────────────────────────────────
-# 🗄️ STEP 2: DATABASE SETUP
-# ─────────────────────────────────────────────
-def setup_database():
-    """Setup PostgreSQL with pgvector extension"""
-    # Database configuration
-    DB_CONFIG = {
-        "dbname": "face_matcher",
-        "user": "postgres",
-        "password": "postgres",  # Change this to your actual password
-        "host": "localhost",
-        "port": "5432"
-    }
-    try:
-        # Connect to PostgreSQL server to create database if it doesn't exist
-        conn = psycopg2.connect(
-            dbname="postgres",
-            user=DB_CONFIG["user"],
-            password=DB_CONFIG["password"],
-            host=DB_CONFIG["host"]
-        )
-        conn.autocommit = True
-        cur = conn.cursor()
-        # Create database if it doesn't exist
-        cur.execute(f"SELECT 1 FROM pg_catalog.pg_database WHERE datname = '{DB_CONFIG['dbname']}'")
-        exists = cur.fetchone()
-        if not exists:
-            cur.execute(f"CREATE DATABASE {DB_CONFIG['dbname']}")
-            print(f"Database {DB_CONFIG['dbname']} created.")
-        cur.close()
-        conn.close()
-        # Connect to the face_matcher database
-        conn = psycopg2.connect(**DB_CONFIG)
-        conn.autocommit = True
-        cur = conn.cursor()
-        # Create pgvector extension if it doesn't exist
-        cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
-        # Create faces table if it doesn't exist
-        cur.execute("""
-            CREATE TABLE IF NOT EXISTS faces (
-                id SERIAL PRIMARY KEY,
-                path TEXT UNIQUE NOT NULL,
-                name TEXT NOT NULL,
-                embedding vector(512),
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            )
-        """)
-        # Create index on the embedding column
-        cur.execute("CREATE INDEX IF NOT EXISTS faces_embedding_idx ON faces USING ivfflat (embedding vector_ip_ops)")
-        print("✅ Database setup complete.")
-        return conn
-    except Exception as e:
-        print(f"❌ Database setup failed: {e}")
-        return None
-# ─────────────────────────────────────────────
-# 🧠 STEP 3: LOAD CLIP MODEL
 # ─────────────────────────────────────────────
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model, preprocess = clip.load("ViT-B/32", device=device)
 print(f"✅ CLIP model loaded on {device}")
 # ─────────────────────────────────────────────
-# 📊 STEP 4: EMBEDDING FUNCTIONS
 # ─────────────────────────────────────────────
-def embed_image(image_path):
-    """Generate CLIP embedding for a single image"""
     try:
-        img = Image.open(image_path).convert("RGB")
-        img_input = preprocess(img).unsqueeze(0).to(device)
-        with torch.no_grad():
-            emb = model.encode_image(img_input).cpu().numpy().flatten()
-            emb /= np.linalg.norm(emb)
-        return emb
     except Exception as e:
-        print(f"⚠️ Error embedding {image_path}: {e}")
-        return None
-def populate_database(conn, limit=500):
-    """Populate database with images and their embeddings"""
     # Collect all .jpg files inside subfolders
     all_images = sorted(glob(os.path.join(img_root, "*", "*.jpg")))
     selected_images = all_images[:limit]
@@ -123,126 +106,141 @@ def populate_database(conn, limit=500):
     if len(selected_images) == 0:
         raise RuntimeError("❌ No image files found in unzipped structure!")
-    cur = conn.cursor()
-    # Check which images are already in the database
-    cur.execute("SELECT path FROM faces")
-    existing_paths = set(path[0] for path in cur.fetchall())
     # Filter out images that are already in the database
-    new_images = [path for path in selected_images if path not in existing_paths]
     if not new_images:
         print("✅ All images are already in the database.")
         return
-    print(f"🧠 Generating CLIP embeddings for {len(new_images)} new images...")
     # Process images in batches to avoid memory issues
     batch_size = 50
     for i in range(0, len(new_images), batch_size):
-        batch = new_images[i:i+batch_size]
-        data_to_insert = []
-        for fpath in tqdm(batch, desc=f"Embedding batch {i//batch_size + 1}"):
-            try:
-                emb = embed_image(fpath)
-                if emb is not None:
-                    name = os.path.splitext(os.path.basename(fpath))[0].replace("_", " ")
-                    data_to_insert.append((fpath, name, emb.tolist()))
-            except Exception as e:
-                print(f"⚠️ Error with {fpath}: {e}")
-        # Insert batch into database
-        if data_to_insert:
-            execute_values(
-                cur,
-                "INSERT INTO faces (path, name, embedding) VALUES %s ON CONFLICT (path) DO NOTHING",
-                [(d[0], d[1], d[2]) for d in data_to_insert],
-                template="(%s, %s, %s::vector)"
             )
-            conn.commit()
     # Count total faces in database
-    cur.execute("SELECT COUNT(*) FROM faces")
-    total_faces = cur.fetchone()[0]
     print(f"✅ Database now contains {total_faces} faces.")
 # ─────────────────────────────────────────────
-# 🔐 STEP 5: LOAD OPENAI API KEY
 # ─────────────────────────────────────────────
 openai.api_key = os.getenv("OPENAI_API_KEY")
 # ─────────────────────────────────────────────
-# 🔍 STEP 6: FACE MATCHING FUNCTION
 # ─────────────────────────────────────────────
-def scan_face(user_image, conn):
     """Scan a face image and find matches in the database"""
     if user_image is None:
         return [], "", "", "Please upload a face image."
     try:
-        user_image = user_image.convert("RGB")
-        tensor = preprocess(user_image).unsqueeze(0).to(device)
-        with torch.no_grad():
-            query_emb = model.encode_image(tensor).cpu().numpy().flatten()
-            query_emb /= np.linalg.norm(query_emb)
-    except Exception as e:
-        return [], "", "", f"Image preprocessing failed: {e}"
-    # Query database for similar faces
-    cur = conn.cursor()
-    emb_list = query_emb.tolist()
-    cur.execute("""
-        SELECT path, name, embedding <-> %s::vector AS distance
-        FROM faces
-        ORDER BY distance
-        LIMIT 5
-    """, (emb_list,))
-    results = cur.fetchall()
-    gallery, captions, names = [], [], []
-    scores = []
-    for path, name, distance in results:
-        try:
-            # Convert distance to similarity score (1 - distance)
-            similarity = 1 - distance
-            scores.append(similarity)
-            img = Image.open(path)
-            gallery.append(img)
-            captions.append(f"{name} (Score: {similarity:.2f})")
-            names.append(name)
-        except Exception as e:
-            captions.append(f"⚠️ Error loading match image: {e}")
-    risk_score = min(100, int(np.mean(scores) * 100)) if scores else 0
-    # 🧠 GPT-4 EXPLANATION
-    try:
-        prompt = (
-            f"The uploaded face matches closely with: {', '.join(names)}. "
-            f"Based on this, should the user be suspicious? Analyze like a funny but smart AI dating detective."
-        )
-        response = openai.chat.completions.create(
-            model="gpt-4",
-            messages=[
-                {"role": "system", "content": "You're a playful but intelligent AI face-matching analyst."},
-                {"role": "user", "content": prompt}
-            ]
-        )
-        explanation = response.choices[0].message.content
     except Exception as e:
-        explanation = f"(OpenAI error): {e}"
-    return gallery, "\n".join(captions), f"{risk_score}/100", explanation
 # ─────────────────────────────────────────────
-# 🌱 STEP 7: ADD NEW FACE FUNCTION
 # ─────────────────────────────────────────────
-def add_new_face(image, name, conn):
     """Add a new face to the database"""
     if image is None or not name:
         return "Please provide both an image and a name."
@@ -254,46 +252,44 @@ def add_new_face(image, name, conn):
         path = f"uploaded_faces/{name.replace(' ', '_')}_{timestamp}.jpg"
         image.save(path)
-        # Generate embedding
-        emb = embed_image(path)
-        if emb is None:
-            return "Failed to generate embedding for the image."
-        # Add to database
-        cur = conn.cursor()
-        cur.execute(
-            "INSERT INTO faces (path, name, embedding) VALUES (%s, %s, %s::vector)",
-            (path, name, emb.tolist())
         )
-        conn.commit()
         return f"✅ Added {name} to the database successfully!"
     except Exception as e:
         return f"❌ Failed to add face: {e}"
 # ─────────────────────────────────────────────
-# 🎛️ STEP 8: GRADIO UI
 # ─────────────────────────────────────────────
 def create_ui():
     """Create Gradio UI with both scan and add functionality"""
-    # Setup database connection
-    conn = setup_database()
-    if conn is None:
-        raise RuntimeError("❌ Database connection failed. Please check your PostgreSQL installation and pgvector extension.")
     # Populate database with initial images
-    populate_database(conn)
-    # Wrapper functions for Gradio that use the database connection
     def scan_face_wrapper(image):
-        return scan_face(image, conn)
     def add_face_wrapper(image, name):
-        return add_new_face(image, name, conn)
     with gr.Blocks(title="Tinder Scanner – Real Face Match Detector") as demo:
         gr.Markdown("# Tinder Scanner – Real Face Match Detector")
-        gr.Markdown("Scan a face image to find visual matches using CLIP and PostgreSQL, and get a cheeky GPT-4 analysis.")
         with gr.Tab("Scan Face"):
             with gr.Row():

 import openai
 from tqdm import tqdm
 from glob import glob
+import chromadb
+from chromadb.utils import embedding_functions
 import json
 import time
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
 # ─────────────────────────────────────────────
 # 📂 STEP 1: UNZIP TO CORRECT STRUCTURE
 img_root = os.path.join(unzip_dir, "lfw-deepfunneled")
 # ─────────────────────────────────────────────
+# 🧠 STEP 2: LOAD CLIP MODEL
 # ─────────────────────────────────────────────
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model, preprocess = clip.load("ViT-B/32", device=device)
 print(f"✅ CLIP model loaded on {device}")
 # ─────────────────────────────────────────────
+# 🗄️ STEP 3: CHROMA DB SETUP & EMBEDDING FUNCTION
 # ─────────────────────────────────────────────
+class ClipEmbeddingFunction:
+    """Custom embedding function for Chroma DB using CLIP"""
+    def __init__(self, model, preprocess, device):
+        self.model = model
+        self.preprocess = preprocess
+        self.device = device
+    def __call__(self, images):
+        """Generate embeddings for a list of image paths"""
+        embeddings = []
+        for image_path in images:
+            try:
+                # Check if the path is a string (for new additions from disk)
+                if isinstance(image_path, str) and os.path.exists(image_path):
+                    img = Image.open(image_path).convert("RGB")
+                else:
+                    # For query images that are already PIL images
+                    img = image_path.convert("RGB") if hasattr(image_path, 'convert') else image_path
+                img_input = self.preprocess(img).unsqueeze(0).to(self.device)
+                with torch.no_grad():
+                    emb = self.model.encode_image(img_input).cpu().numpy().flatten()
+                    emb /= np.linalg.norm(emb)
+                embeddings.append(emb.tolist())
+            except Exception as e:
+                print(f"⚠️ Error embedding image: {e}")
+                # Return a zero vector as fallback
+                embeddings.append([0] * 512)
+        return embeddings
+def setup_database():
+    """Setup ChromaDB with CLIP embedding function"""
     try:
+        # Create persistent client
+        client = chromadb.PersistentClient(path="./chroma_db")
+        # Create custom embedding function
+        embedding_function = ClipEmbeddingFunction(model, preprocess, device)
+        # Create or get existing collection
+        collection = client.get_or_create_collection(
+            name="faces",
+            embedding_function=embedding_function,
+            metadata={"hnsw:space": "cosine"}  # Use cosine similarity
+        )
+        print("✅ ChromaDB setup complete.")
+        return client, collection
     except Exception as e:
+        print(f"❌ Database setup failed: {e}")
+        return None, None
+def populate_database(collection, limit=500):
+    """Populate ChromaDB with images and their embeddings"""
     # Collect all .jpg files inside subfolders
     all_images = sorted(glob(os.path.join(img_root, "*", "*.jpg")))
     selected_images = all_images[:limit]
     if len(selected_images) == 0:
         raise RuntimeError("❌ No image files found in unzipped structure!")
+    # Get existing IDs
+    existing_ids = set()
+    try:
+        existing_count = collection.count()
+        if existing_count > 0:
+            results = collection.get(limit=existing_count)
+            existing_ids = set(results['ids'])
+    except Exception as e:
+        print(f"Error getting existing IDs: {e}")
     # Filter out images that are already in the database
+    new_images = []
+    new_ids = []
+    new_metadatas = []
+    for fpath in selected_images:
+        # Create ID from path
+        image_id = fpath.replace('/', '_')
+        if image_id not in existing_ids:
+            new_images.append(fpath)
+            new_ids.append(image_id)
+            name = os.path.splitext(os.path.basename(fpath))[0].replace("_", " ")
+            new_metadatas.append({
+                "path": fpath,
+                "name": name
+            })
     if not new_images:
         print("✅ All images are already in the database.")
         return
+    print(f"🧠 Adding {len(new_images)} new images to the database...")
     # Process images in batches to avoid memory issues
     batch_size = 50
     for i in range(0, len(new_images), batch_size):
+        batch_imgs = new_images[i:i+batch_size]
+        batch_ids = new_ids[i:i+batch_size]
+        batch_metadatas = new_metadatas[i:i+batch_size]
+        print(f"Processing batch {i//batch_size + 1}/{(len(new_images)-1)//batch_size + 1}...")
+        try:
+            collection.add(
+                documents=batch_imgs,  # ChromaDB will call our embedding function on these
+                ids=batch_ids,
+                metadatas=batch_metadatas
             )
+        except Exception as e:
+            print(f"⚠️ Error adding batch to database: {e}")
     # Count total faces in database
+    total_faces = collection.count()
     print(f"✅ Database now contains {total_faces} faces.")
 # ─────────────────────────────────────────────
+# 🔐 STEP 4: LOAD OPENAI API KEY
 # ─────────────────────────────────────────────
 openai.api_key = os.getenv("OPENAI_API_KEY")
+if not openai.api_key:
+    print("⚠️ OpenAI API key not found. GPT-4 analysis will not work.")
 # ─────────────────────────────────────────────
+# 🔍 STEP 5: FACE MATCHING FUNCTION
 # ─────────────────────────────────────────────
+def scan_face(user_image, collection):
     """Scan a face image and find matches in the database"""
     if user_image is None:
         return [], "", "", "Please upload a face image."
     try:
+        # Query database for similar faces using the image directly
+        results = collection.query(
+            query_embeddings=None,  # Will be generated by our embedding function
+            query_images=[user_image],  # Pass the PIL image directly
+            n_results=5,
+            include=["metadatas", "distances"]
+        )
+        metadatas = results.get("metadatas", [[]])[0]
+        distances = results.get("distances", [[]])[0]
+        gallery, captions, names = [], [], []
+        scores = []
+        for i, metadata in enumerate(metadatas):
+            try:
+                path = metadata["path"]
+                name = metadata["name"]
+                # Convert distance to similarity score (1 - normalized_distance)
+                # ChromaDB uses cosine distance, so 0 is most similar, 2 is most different
+                distance = distances[i]
+                similarity = 1 - (distance / 2)  # Convert to 0-1 scale
+                scores.append(similarity)
+                img = Image.open(path)
+                gallery.append(img)
+                captions.append(f"{name} (Score: {similarity:.2f})")
+                names.append(name)
+            except Exception as e:
+                captions.append(f"⚠️ Error loading match image: {e}")
+        risk_score = min(100, int(np.mean(scores) * 100)) if scores else 0
+        # 🧠 GPT-4 EXPLANATION
+        explanation = ""
+        if openai.api_key and names:
+            try:
+                prompt = (
+                    f"The uploaded face matches closely with: {', '.join(names)}. "
+                    f"Based on this, should the user be suspicious? Analyze like a funny but smart AI dating detective."
+                )
+                response = openai.chat.completions.create(
+                    model="gpt-4",
+                    messages=[
+                        {"role": "system", "content": "You're a playful but intelligent AI face-matching analyst."},
+                        {"role": "user", "content": prompt}
+                    ]
+                )
+                explanation = response.choices[0].message.content
+            except Exception as e:
+                explanation = f"(OpenAI error): {e}"
+        else:
+            explanation = "OpenAI API key not set or no matches found."
+        return gallery, "\n".join(captions), f"{risk_score}/100", explanation
     except Exception as e:
+        return [], "", "", f"Error scanning face: {e}"
 # ─────────────────────────────────────────────
+# 🌱 STEP 6: ADD NEW FACE FUNCTION
 # ─────────────────────────────────────────────
+def add_new_face(image, name, collection):
     """Add a new face to the database"""
     if image is None or not name:
         return "Please provide both an image and a name."
         path = f"uploaded_faces/{name.replace(' ', '_')}_{timestamp}.jpg"
         image.save(path)
+        # Add to ChromaDB
+        image_id = path.replace('/', '_')
+        collection.add(
+            documents=[path],
+            ids=[image_id],
+            metadatas=[{
+                "path": path,
+                "name": name
+            }]
         )
         return f"✅ Added {name} to the database successfully!"
     except Exception as e:
         return f"❌ Failed to add face: {e}"
 # ─────────────────────────────────────────────
+# 🎛️ STEP 7: GRADIO UI
 # ─────────────────────────────────────────────
 def create_ui():
     """Create Gradio UI with both scan and add functionality"""
+    # Setup database
+    client, collection = setup_database()
+    if collection is None:
+        raise RuntimeError("❌ Database setup failed.")
     # Populate database with initial images
+    populate_database(collection)
+    # Wrapper functions for Gradio that use the database collection
     def scan_face_wrapper(image):
+        return scan_face(image, collection)
     def add_face_wrapper(image, name):
+        return add_new_face(image, name, collection)
     with gr.Blocks(title="Tinder Scanner – Real Face Match Detector") as demo:
         gr.Markdown("# Tinder Scanner – Real Face Match Detector")
+        gr.Markdown("Scan a face image to find visual matches using CLIP and ChromaDB, and get a cheeky GPT-4 analysis.")
         with gr.Tab("Scan Face"):
             with gr.Row():