Spaces:

ddriscoll
/

SOC3242-01_Group_3_Interactive

Sleeping

App Files Files Community

David Driscoll commited on Feb 17

Commit

f3de933

1 Parent(s): 134b727

Emotion fix

Browse files

Files changed (1) hide show

app.py +236 -229

app.py CHANGED Viewed

@@ -2,242 +2,269 @@ import gradio as gr
 import cv2
 import numpy as np
 import torch
 from PIL import Image
 import mediapipe as mp
-from transformers import (
-    AutoFeatureExtractor,
-    AutoModel,
-    AutoImageProcessor,
-    AutoModelForImageClassification,
-    AutoModelForSemanticSegmentation
-)
 # -----------------------------
-# Configuration & Device Setup
 # -----------------------------
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 DESIRED_SIZE = (640, 480)
 # -----------------------------
-# Initialize Mediapipe Face Detection
 # -----------------------------
-mp_face_detection = mp.solutions.face_detection
-face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
 # -----------------------------
-# Load New Models from Hugging Face
 # -----------------------------
-# 1. Facial Recognition & Identification (facebook/dino-vitb16)
-facial_recognition_extractor = AutoFeatureExtractor.from_pretrained("facebook/dino-vitb16")
-facial_recognition_model = AutoModel.from_pretrained("facebook/dino-vitb16")
-facial_recognition_model.to(device)
-facial_recognition_model.eval()
-# Create a dummy database for demonstration (embeddings of dimension 768 assumed)
-dummy_database = {
-    "Alice": torch.randn(768).to(device),
-    "Bob": torch.randn(768).to(device)
-}
-# 2. Emotion Detection (nateraw/facial-expression-recognition)
-emotion_processor = AutoImageProcessor.from_pretrained("nateraw/facial-expression-recognition")
-emotion_model = AutoModelForImageClassification.from_pretrained("nateraw/facial-expression-recognition")
 emotion_model.to(device)
 emotion_model.eval()
-# 3. Age & Gender Prediction (oayu/age-gender-estimation)
-age_gender_processor = AutoImageProcessor.from_pretrained("oayu/age-gender-estimation")
-age_gender_model = AutoModelForImageClassification.from_pretrained("oayu/age-gender-estimation")
-age_gender_model.to(device)
-age_gender_model.eval()
-# 4. Face Parsing (hila-chefer/face-parsing)
-face_parsing_processor = AutoImageProcessor.from_pretrained("hila-chefer/face-parsing")
-face_parsing_model = AutoModelForSemanticSegmentation.from_pretrained("hila-chefer/face-parsing")
-face_parsing_model.to(device)
-face_parsing_model.eval()
-# 5. Deepfake Detection (microsoft/FaceForensics)
-deepfake_processor = AutoImageProcessor.from_pretrained("microsoft/FaceForensics")
-deepfake_model = AutoModelForImageClassification.from_pretrained("microsoft/FaceForensics")
-deepfake_model.to(device)
-deepfake_model.eval()
 # -----------------------------
-# Helper Functions for New Inferences
 # -----------------------------
-def compute_facial_recognition(image):
-    """
-    Detects a face using MediaPipe, crops it, and computes its embedding with DINO-ViT.
-    Compares the embedding against a dummy database to "identify" the person.
-    """
-    frame = np.array(image)
-    frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-    frame_resized = cv2.resize(frame_bgr, DESIRED_SIZE)
-    frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
-    face_results = face_detection.process(frame_rgb)
-    if face_results.detections:
-        detection = face_results.detections[0]
-        bbox = detection.location_data.relative_bounding_box
-        h, w, _ = frame_rgb.shape
-        x = int(bbox.xmin * w)
-        y = int(bbox.ymin * h)
-        box_w = int(bbox.width * w)
-        box_h = int(bbox.height * h)
-        face_crop = frame_rgb[y:y+box_h, x:x+box_w]
-        face_image = Image.fromarray(face_crop)
-        inputs = facial_recognition_extractor(face_image, return_tensors="pt").to(device)
-        with torch.no_grad():
-            outputs = facial_recognition_model(**inputs)
-        # Use mean pooling over the last hidden state to get an embedding vector
-        embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
-        # Compare against dummy database using cosine similarity
-        best_score = -1
-        best_name = "Unknown"
-        for name, db_emb in dummy_database.items():
-            cos_sim = torch.nn.functional.cosine_similarity(embeddings, db_emb, dim=0)
-            if cos_sim > best_score:
-                best_score = cos_sim
-                best_name = name
-        threshold = 0.7  # dummy threshold for identification
-        if best_score > threshold:
-            result = f"Identified as {best_name} (sim: {best_score:.2f})"
-        else:
-            result = f"No match found (best: {best_name}, sim: {best_score:.2f})"
-        return face_crop, result
     else:
-        return frame, "No face detected"
-def compute_emotion_detection(image):
     """
-    Detects a face, crops it, and classifies the facial expression.
     """
-    frame = np.array(image)
-    frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-    frame_resized = cv2.resize(frame_bgr, DESIRED_SIZE)
-    frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
-    face_results = face_detection.process(frame_rgb)
     if face_results.detections:
         detection = face_results.detections[0]
         bbox = detection.location_data.relative_bounding_box
-        h, w, _ = frame_rgb.shape
         x = int(bbox.xmin * w)
         y = int(bbox.ymin * h)
         box_w = int(bbox.width * w)
         box_h = int(bbox.height * h)
-        face_crop = frame_rgb[y:y+box_h, x:x+box_w]
         face_image = Image.fromarray(face_crop)
         inputs = emotion_processor(face_image, return_tensors="pt").to(device)
         with torch.no_grad():
             outputs = emotion_model(**inputs)
         logits = outputs.logits
-        pred = logits.argmax(-1).item()
-        label = emotion_model.config.id2label[pred]
-        return face_crop, f"Emotion: {label}"
     else:
-        return frame, "No face detected"
-def compute_age_gender(image):
-    """
-    Detects a face, crops it, and predicts the age & gender.
-    """
-    frame = np.array(image)
-    frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-    frame_resized = cv2.resize(frame_bgr, DESIRED_SIZE)
-    frame_rgb = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
-    face_results = face_detection.process(frame_rgb)
-    if face_results.detections:
-        detection = face_results.detections[0]
-        bbox = detection.location_data.relative_bounding_box
-        h, w, _ = frame_rgb.shape
-        x = int(bbox.xmin * w)
-        y = int(bbox.ymin * h)
-        box_w = int(bbox.width * w)
-        box_h = int(bbox.height * h)
-        face_crop = frame_rgb[y:y+box_h, x:x+box_w]
-        face_image = Image.fromarray(face_crop)
-        inputs = age_gender_processor(face_image, return_tensors="pt").to(device)
-        with torch.no_grad():
-            outputs = age_gender_model(**inputs)
-        logits = outputs.logits
-        pred = logits.argmax(-1).item()
-        label = age_gender_model.config.id2label[pred]
-        return face_crop, f"Age & Gender: {label}"
-    else:
-        return frame, "No face detected"
-def compute_face_parsing(image):
-    """
-    Runs face parsing (segmentation) on the provided image.
-    """
-    image_pil = Image.fromarray(np.array(image))
-    inputs = face_parsing_processor(image_pil, return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = face_parsing_model(**inputs)
-    logits = outputs.logits  # shape: (batch, num_labels, H, W)
-    segmentation = logits.argmax(dim=1)[0].cpu().numpy()
-    # For visualization, we apply a color map to the segmentation mask.
-    segmentation_norm = np.uint8(255 * segmentation / (segmentation.max() + 1e-5))
-    segmentation_color = cv2.applyColorMap(segmentation_norm, cv2.COLORMAP_JET)
-    return segmentation_color, "Face Parsing completed"
-def compute_deepfake_detection(image):
-    """
-    Runs deepfake detection on the image.
-    """
-    image_pil = Image.fromarray(np.array(image))
-    inputs = deepfake_processor(image_pil, return_tensors="pt").to(device)
-    with torch.no_grad():
-        outputs = deepfake_model(**inputs)
-    logits = outputs.logits
-    pred = logits.argmax(-1).item()
-    label = deepfake_model.config.id2label[pred]
-    return np.array(image), f"Deepfake Detection: {label}"
 # -----------------------------
-# Analysis Functions (Wrapping Inference & Green Text)
 # -----------------------------
-def analyze_facial_recognition(image):
-    annotated_face, result = compute_facial_recognition(image)
-    return annotated_face, f"<div style='color: lime !important;'>Facial Recognition: {result}</div>"
-def analyze_emotion_detection(image):
-    face_crop, result = compute_emotion_detection(image)
-    return face_crop, f"<div style='color: lime !important;'>{result}</div>"
-def analyze_age_gender(image):
-    face_crop, result = compute_age_gender(image)
-    return face_crop, f"<div style='color: lime !important;'>{result}</div>"
-def analyze_face_parsing(image):
-    segmentation, result = compute_face_parsing(image)
-    return segmentation, f"<div style='color: lime !important;'>{result}</div>"
-def analyze_deepfake_detection(image):
-    output, result = compute_deepfake_detection(image)
-    return output, f"<div style='color: lime !important;'>{result}</div>"
 # -----------------------------
-# Custom CSS (All Text in Green)
 # -----------------------------
 custom_css = """
 @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
 body {
     background-color: #0e0e0e;
     font-family: 'Orbitron', sans-serif;
-    margin: 0;
-    padding: 0;
     color: #32CD32;
 }
 .gradio-container {
@@ -261,85 +288,65 @@ input, button, .output {
 """
 # -----------------------------
-# Create Gradio Interfaces for New Models
 # -----------------------------
-facial_recognition_interface = gr.Interface(
-    fn=analyze_facial_recognition,
-    inputs=gr.Image(label="Upload a Face Image for Facial Recognition"),
-    outputs=[gr.Image(type="numpy", label="Cropped Face / Embedding Visualization"),
-             gr.HTML(label="Facial Recognition Result")],
-    title="Facial Recognition & Identification",
-    description="Extracts facial embeddings using facebook/dino-vitb16 and identifies the face by comparing against a dummy database.",
     live=False
 )
 emotion_interface = gr.Interface(
-    fn=analyze_emotion_detection,
-    inputs=gr.Image(label="Upload a Face Image for Emotion Detection"),
-    outputs=[gr.Image(type="numpy", label="Cropped Face"),
-             gr.HTML(label="Emotion Detection")],
-    title="Emotion Detection",
-    description="Classifies the facial expression using nateraw/facial-expression-recognition.",
     live=False
 )
-age_gender_interface = gr.Interface(
-    fn=analyze_age_gender,
-    inputs=gr.Image(label="Upload a Face Image for Age & Gender Prediction"),
-    outputs=[gr.Image(type="numpy", label="Cropped Face"),
-             gr.HTML(label="Age & Gender Prediction")],
-    title="Age & Gender Prediction",
-    description="Predicts age and gender from the face using oayu/age-gender-estimation.",
     live=False
 )
-face_parsing_interface = gr.Interface(
-    fn=analyze_face_parsing,
-    inputs=gr.Image(label="Upload a Face Image for Face Parsing"),
-    outputs=[gr.Image(type="numpy", label="Segmentation Overlay"),
-             gr.HTML(label="Face Parsing")],
-    title="Face Parsing",
-    description="Segments face regions (eyes, nose, lips, hair, etc.) using hila-chefer/face-parsing.",
     live=False
 )
-deepfake_interface = gr.Interface(
-    fn=analyze_deepfake_detection,
-    inputs=gr.Image(label="Upload an Image for Deepfake Detection"),
-    outputs=[gr.Image(type="numpy", label="Input Image"),
-             gr.HTML(label="Deepfake Detection")],
-    title="Deepfake Detection",
-    description="Detects manipulated or deepfake images using microsoft/FaceForensics.",
     live=False
 )
-# -----------------------------
-# Create a Tabbed Interface
-# -----------------------------
 tabbed_interface = gr.TabbedInterface(
-    interface_list=[
-        facial_recognition_interface,
-        emotion_interface,
-        age_gender_interface,
-        face_parsing_interface,
-        deepfake_interface
-    ],
-    tab_names=[
-        "Facial Recognition",
-        "Emotion Detection",
-        "Age & Gender",
-        "Face Parsing",
-        "Deepfake Detection"
-    ]
 )
 # -----------------------------
-# Wrap in a Blocks Layout & Launch
 # -----------------------------
 demo = gr.Blocks(css=custom_css)
 with demo:
-    gr.Markdown("<h1 class='gradio-title' style='color: #32CD32;'>Multi-Analysis Face App</h1>")
-    gr.Markdown("<p class='gradio-description' style='color: #32CD32;'>Upload an image to run advanced face analysis using state-of-the-art Hugging Face models.</p>")
     tabbed_interface.render()
 if __name__ == "__main__":

 import cv2
 import numpy as np
 import torch
+from torchvision import models, transforms
+from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
 from PIL import Image
 import mediapipe as mp
+# Hugging Face imports for emotion detection
+from transformers import AutoImageProcessor, AutoModelForImageClassification
 # -----------------------------
+# Configuration
 # -----------------------------
+SKIP_RATE = 1  # For image processing, always run the analysis
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 DESIRED_SIZE = (640, 480)
 # -----------------------------
+# Global caches for overlay info and frame counters
 # -----------------------------
+posture_cache = {"landmarks": None, "text": "Initializing...", "counter": 0}
+emotion_cache = {"text": "Initializing...", "counter": 0}
+objects_cache = {"boxes": None, "text": "Initializing...", "object_list_text": "", "counter": 0}
+faces_cache = {"boxes": None, "text": "Initializing...", "counter": 0}
 # -----------------------------
+# Initialize Models and Helpers
 # -----------------------------
+mp_pose = mp.solutions.pose
+pose = mp_pose.Pose()
+mp_drawing = mp.solutions.drawing_utils
+mp_face_detection = mp.solutions.face_detection
+face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
+object_detection_model = models.detection.fasterrcnn_resnet50_fpn(
+    weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT
+)
+object_detection_model.eval().to(device)
+obj_transform = transforms.Compose([transforms.ToTensor()])
+# Initialize the Hugging Face emotion detection model.
+# (Using the public "nateraw/fer" repo to mimic expression recognition.)
+emotion_processor = AutoImageProcessor.from_pretrained("nateraw/fer")
+emotion_model = AutoModelForImageClassification.from_pretrained("nateraw/fer")
 emotion_model.to(device)
 emotion_model.eval()
+# Retrieve object categories from model weights metadata
+object_categories = FasterRCNN_ResNet50_FPN_Weights.DEFAULT.meta["categories"]
+# -----------------------------
+# Overlay Drawing Functions
+# -----------------------------
+def draw_posture_overlay(raw_frame, landmarks):
+    # Draw connector lines using MediaPipe's POSE_CONNECTIONS
+    for connection in mp_pose.POSE_CONNECTIONS:
+        start_idx, end_idx = connection
+        if start_idx < len(landmarks) and end_idx < len(landmarks):
+            start_point = landmarks[start_idx]
+            end_point = landmarks[end_idx]
+            cv2.line(raw_frame, start_point, end_point, (50, 205, 50), 2)
+    # Draw landmark points in lime green (BGR: (50,205,50))
+    for (x, y) in landmarks:
+        cv2.circle(raw_frame, (x, y), 4, (50, 205, 50), -1)
+    return raw_frame
+def draw_boxes_overlay(raw_frame, boxes, color):
+    for (x1, y1, x2, y2) in boxes:
+        cv2.rectangle(raw_frame, (x1, y1), (x2, y2), color, 2)
+    return raw_frame
 # -----------------------------
+# Heavy (Synchronous) Detection Functions
 # -----------------------------
+def compute_posture_overlay(image):
+    frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    h, w, _ = frame_bgr.shape
+    frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
+    small_h, small_w, _ = frame_bgr_small.shape
+    frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
+    pose_results = pose.process(frame_rgb_small)
+    if pose_results.pose_landmarks:
+        landmarks = []
+        for lm in pose_results.pose_landmarks.landmark:
+            # Scale landmarks back to the original image size
+            x = int(lm.x * small_w * (w / small_w))
+            y = int(lm.y * small_h * (h / small_h))
+            landmarks.append((x, y))
+        text = "Posture detected"
     else:
+        landmarks = []
+        text = "No posture detected"
+    return landmarks, text
+def compute_emotion_overlay(image):
     """
+    This function mimics the original FER-based expression recognition,
+    but uses a Hugging Face emotion model instead.
     """
+    frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
+    frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
+    # Use MediaPipe to detect a face and crop it
+    face_results = face_detection.process(frame_rgb_small)
     if face_results.detections:
         detection = face_results.detections[0]
         bbox = detection.location_data.relative_bounding_box
+        h, w, _ = frame_rgb_small.shape
         x = int(bbox.xmin * w)
         y = int(bbox.ymin * h)
         box_w = int(bbox.width * w)
         box_h = int(bbox.height * h)
+        face_crop = frame_rgb_small[y:y+box_h, x:x+box_w]
         face_image = Image.fromarray(face_crop)
+        # Process face crop with the Hugging Face emotion model
         inputs = emotion_processor(face_image, return_tensors="pt").to(device)
         with torch.no_grad():
             outputs = emotion_model(**inputs)
         logits = outputs.logits
+        probs = torch.softmax(logits, dim=-1)
+        score, pred = torch.max(probs, dim=-1)
+        label = emotion_model.config.id2label[pred.item()]
+        text = f"{label} ({score.item():.2f})"
     else:
+        text = "No face detected"
+    return text
+def compute_objects_overlay(image):
+    frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
+    frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
+    image_pil = Image.fromarray(frame_rgb_small)
+    img_tensor = obj_transform(image_pil).to(device)
     with torch.no_grad():
+        detections = object_detection_model([img_tensor])[0]
+    threshold = 0.8
+    boxes = []
+    object_list = []
+    for box, score, label in zip(detections["boxes"], detections["scores"], detections["labels"]):
+        if score > threshold:
+            boxes.append(tuple(box.int().cpu().numpy()))
+            label_idx = int(label)
+            label_name = object_categories[label_idx] if label_idx < len(object_categories) else "Unknown"
+            object_list.append(f"{label_name} ({score:.2f})")
+    text = f"Detected {len(boxes)} object(s)" if boxes else "No objects detected"
+    object_list_text = " | ".join(object_list) if object_list else "None"
+    return boxes, text, object_list_text
+def compute_faces_overlay(image):
+    frame_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    h, w, _ = frame_bgr.shape
+    frame_bgr_small = cv2.resize(frame_bgr, DESIRED_SIZE)
+    small_h, small_w, _ = frame_bgr_small.shape
+    frame_rgb_small = cv2.cvtColor(frame_bgr_small, cv2.COLOR_BGR2RGB)
+    face_results = face_detection.process(frame_rgb_small)
+    boxes = []
+    if face_results.detections:
+        for detection in face_results.detections:
+            bbox = detection.location_data.relative_bounding_box
+            x = int(bbox.xmin * small_w)
+            y = int(bbox.ymin * small_h)
+            box_w = int(bbox.width * small_w)
+            box_h = int(bbox.height * small_h)
+            boxes.append((x, y, x + box_w, y + box_h))
+        text = f"Detected {len(boxes)} face(s)"
+    else:
+        text = "No faces detected"
+    return boxes, text
 # -----------------------------
+# Main Analysis Functions for Single Image
 # -----------------------------
+def analyze_posture_current(image):
+    global posture_cache
+    posture_cache["counter"] += 1
+    current_frame = np.array(image)
+    if posture_cache["counter"] % SKIP_RATE == 0 or posture_cache["landmarks"] is None:
+        landmarks, text = compute_posture_overlay(image)
+        posture_cache["landmarks"] = landmarks
+        posture_cache["text"] = text
+    output = current_frame.copy()
+    if posture_cache["landmarks"]:
+        output = draw_posture_overlay(output, posture_cache["landmarks"])
+    return output, f"<div style='color: lime !important;'>Posture Analysis: {posture_cache['text']}</div>"
+def analyze_emotion_current(image):
+    global emotion_cache
+    emotion_cache["counter"] += 1
+    current_frame = np.array(image)
+    if emotion_cache["counter"] % SKIP_RATE == 0 or emotion_cache["text"] is None:
+        text = compute_emotion_overlay(image)
+        emotion_cache["text"] = text
+    return current_frame, f"<div style='color: lime !important;'>Emotion Analysis: {emotion_cache['text']}</div>"
+def analyze_objects_current(image):
+    global objects_cache
+    objects_cache["counter"] += 1
+    current_frame = np.array(image)
+    if objects_cache["counter"] % SKIP_RATE == 0 or objects_cache["boxes"] is None:
+        boxes, text, object_list_text = compute_objects_overlay(image)
+        objects_cache["boxes"] = boxes
+        objects_cache["text"] = text
+        objects_cache["object_list_text"] = object_list_text
+    output = current_frame.copy()
+    if objects_cache["boxes"]:
+        output = draw_boxes_overlay(output, objects_cache["boxes"], (255, 255, 0))
+    combined_text = f"Object Detection: {objects_cache['text']}<br>Details: {objects_cache['object_list_text']}"
+    return output, f"<div style='color: lime !important;'>{combined_text}</div>"
+def analyze_faces_current(image):
+    global faces_cache
+    faces_cache["counter"] += 1
+    current_frame = np.array(image)
+    if faces_cache["counter"] % SKIP_RATE == 0 or faces_cache["boxes"] is None:
+        boxes, text = compute_faces_overlay(image)
+        faces_cache["boxes"] = boxes
+        faces_cache["text"] = text
+    output = current_frame.copy()
+    if faces_cache["boxes"]:
+        output = draw_boxes_overlay(output, faces_cache["boxes"], (0, 0, 255))
+    return output, f"<div style='color: lime !important;'>Face Detection: {faces_cache['text']}</div>"
+def analyze_all(image):
+    current_frame = np.array(image).copy()
+    # Posture Analysis
+    landmarks, posture_text = compute_posture_overlay(image)
+    if landmarks:
+        current_frame = draw_posture_overlay(current_frame, landmarks)
+    # Emotion Analysis
+    emotion_text = compute_emotion_overlay(image)
+    # Object Detection
+    boxes_obj, objects_text, object_list_text = compute_objects_overlay(image)
+    if boxes_obj:
+        current_frame = draw_boxes_overlay(current_frame, boxes_obj, (255, 255, 0))
+    # Face Detection
+    boxes_face, faces_text = compute_faces_overlay(image)
+    if boxes_face:
+        current_frame = draw_boxes_overlay(current_frame, boxes_face, (0, 0, 255))
+    # Combined Analysis Text
+    combined_text = (
+        f"<b>Posture Analysis:</b> {posture_text}<br>"
+        f"<b>Emotion Analysis:</b> {emotion_text}<br>"
+        f"<b>Object Detection:</b> {objects_text}<br>"
+        f"<b>Detected Objects:</b> {object_list_text}<br>"
+        f"<b>Face Detection:</b> {faces_text}"
+    )
+    if object_list_text and object_list_text != "None":
+        description_text = f"Image Description: The scene features {object_list_text}."
+    else:
+        description_text = "Image Description: No prominent objects detected."
+    combined_text += f"<br><br><div style='border:1px solid lime; padding:10px; box-shadow: 0 0 10px lime;'><b>{description_text}</b></div>"
+    combined_text_html = f"<div style='color: lime !important;'>{combined_text}</div>"
+    return current_frame, combined_text_html
 # -----------------------------
+# Custom CSS (High-Tech Neon Theme)
 # -----------------------------
 custom_css = """
 @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap');
 body {
     background-color: #0e0e0e;
     font-family: 'Orbitron', sans-serif;
     color: #32CD32;
 }
 .gradio-container {
 """
 # -----------------------------
+# Create Individual Interfaces for Image Processing
 # -----------------------------
+posture_interface = gr.Interface(
+    fn=analyze_posture_current,
+    inputs=gr.Image(label="Upload an Image for Posture Analysis"),
+    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Posture Analysis")],
+    title="Posture",
+    description="Detects your posture using MediaPipe with connector lines.",
     live=False
 )
 emotion_interface = gr.Interface(
+    fn=analyze_emotion_current,
+    inputs=gr.Image(label="Upload an Image for Emotion Analysis"),
+    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Emotion Analysis")],
+    title="Emotion",
+    description="Detects facial emotions using a Hugging Face model.",
     live=False
 )
+objects_interface = gr.Interface(
+    fn=analyze_objects_current,
+    inputs=gr.Image(label="Upload an Image for Object Detection"),
+    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Object Detection")],
+    title="Objects",
+    description="Detects objects using a pretrained Faster R-CNN.",
     live=False
 )
+faces_interface = gr.Interface(
+    fn=analyze_faces_current,
+    inputs=gr.Image(label="Upload an Image for Face Detection"),
+    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Face Detection")],
+    title="Faces",
+    description="Detects faces using MediaPipe.",
     live=False
 )
+all_interface = gr.Interface(
+    fn=analyze_all,
+    inputs=gr.Image(label="Upload an Image for All Inferences"),
+    outputs=[gr.Image(type="numpy", label="Annotated Output"), gr.HTML(label="Combined Analysis")],
+    title="All Inferences",
+    description="Runs posture, emotion, object, and face detection all at once.",
     live=False
 )
 tabbed_interface = gr.TabbedInterface(
+    interface_list=[posture_interface, emotion_interface, objects_interface, faces_interface, all_interface],
+    tab_names=["Posture", "Emotion", "Objects", "Faces", "All Inferences"]
 )
 # -----------------------------
+# Wrap in a Blocks Layout and Launch
 # -----------------------------
 demo = gr.Blocks(css=custom_css)
 with demo:
+    gr.Markdown("<h1 class='gradio-title' style='color: #32CD32;'>Multi-Analysis Image App</h1>")
+    gr.Markdown("<p class='gradio-description' style='color: #32CD32;'>Upload an image to run high-tech analysis for posture, emotions, objects, and faces.</p>")
     tabbed_interface.render()
 if __name__ == "__main__":