Spaces:

ctizzzy0
/

multi-modal-emotion-ai

Sleeping

App Files Files Community

ctizzzy0 commited on 16 days ago

Commit

c9236ad

verified ·

1 Parent(s): 13af33f

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -84

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
-# app.py — Multi-Modal Emotion AI (Text + Voice + Face)
-# Features: per-modality analysis, fusion (weighted), safety screen, CBT distortions,
-# PDF report with charts, trends logging, face auto-crop. CPU-friendly for HF Spaces.
-import os, io, json, datetime
 from typing import Dict, List, Optional, Tuple
 import numpy as np
@@ -15,34 +16,36 @@ import gradio as gr
 from fpdf import FPDF
 from transformers import pipeline
-# -----------------------------
-# Public, lightweight models
-# -----------------------------
-TEXT_MODEL  = "SamLowe/roberta-base-go_emotions"   # 27 emotions
-VOICE_MODEL = "superb/wav2vec2-base-superb-er"     # speech emotion recognition
 FACE_MODEL  = "trpakov/vit-face-expression"        # facial expression (ViT)
 text_pipe  = pipeline("text-classification",  model=TEXT_MODEL,  top_k=None)
 voice_pipe = pipeline("audio-classification", model=VOICE_MODEL, top_k=None)
 face_pipe  = pipeline("image-classification", model=FACE_MODEL,  top_k=None)
-# -----------------------------
-# Files / persistence
-# -----------------------------
-RUN_LOG = "runs.csv"
-if not os.path.exists(RUN_LOG):
-    pd.DataFrame(columns=["timestamp","text","text_top","voice_top","face_top","fused_top","pos_index"]).to_csv(RUN_LOG, index=False)
-os.makedirs("charts", exist_ok=True)
-# -----------------------------
-# Safety & CBT
-# -----------------------------
 RISK_TERMS = {
     "self_harm": ["kill myself","end it","suicide","self harm","cutting","overdose"],
     "violence":  ["hurt them","attack","kill them","shoot","stab","revenge"]
 }
 DISTORTIONS = {
     "catastrophizing":   ["ruined","disaster","worst ever","nothing will work","everything is over"],
     "all_or_nothing":    ["always","never","completely","totally","entirely"],
@@ -79,13 +82,15 @@ def detect_distortions(text: str) -> List[str]:
 def reframe_tips(names: List[str]) -> List[str]:
     return [REFRAMES[n] for n in names if n in REFRAMES]
-# -----------------------------
-# Emotion utilities
-# -----------------------------
-POSITIVE = set(["admiration","amusement","approval","gratitude","joy","love","optimism","relief","pride","excitement"])
-NEGATIVE = set(["anger","annoyance","disappointment","disapproval","disgust","embarrassment","fear","grief","nervousness","remorse","sadness"])
-def to_probs(outputs) -> Dict[str,float]:
     # pipelines return list[list[{"label","score"}]] when top_k=None
     if isinstance(outputs, list) and outputs and isinstance(outputs[0], list):
         outputs = outputs[0]
@@ -104,7 +109,7 @@ def positivity_index(prob: Optional[Dict[str,float]]) -> float:
     neg = sum(prob.get(k,0.0) for k in NEGATIVE)
     return round((pos - neg + 1)/2, 4)  # [-1,1] -> [0,1]
-def union_merge(dicts: List[Optional[Dict[str,float]]], weights: List[float]) -> Dict[str,float]:
     labels = set()
     for d in dicts:
         if d: labels |= set(d.keys())
@@ -119,7 +124,7 @@ def union_merge(dicts: List[Optional[Dict[str,float]]], weights: List[float]) ->
 def bar_fig(prob: Dict[str,float], title: str):
     labels = list(prob.keys())
     vals = [prob[k]*100 for k in labels]
-    fig, ax = plt.subplots(figsize=(7.0, 3.6))
     ax.bar(labels, vals)
     ax.set_ylim(0, 100)
     ax.set_ylabel("Probability (%)")
@@ -135,14 +140,14 @@ def save_chart(prob: Dict[str,float], title: str, path: str):
     fig.savefig(path, dpi=160, bbox_inches="tight")
     plt.close(fig)
-# -----------------------------
 # Computer vision: face crop
-# -----------------------------
 HAAR = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
 def crop_face(image_path: str) -> Image.Image:
     try:
         img = cv2.imread(image_path)
-        if img is None:  # fallback
             return Image.open(image_path).convert("RGB")
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         faces = HAAR.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5, minSize=(80,80))
@@ -153,13 +158,15 @@ def crop_face(image_path: str) -> Image.Image:
     except Exception:
         return Image.open(image_path).convert("RGB")
-# -----------------------------
 # Per-modality inference
-# -----------------------------
 def analyze_text(text: str):
     if not text or not text.strip():
         return gr.Error("Please enter text."), None, None
-    probs = to_probs(text_pipe(text))
     msg = f"**Top Text Emotion:** {top_item(probs)}  |  **Positivity Index:** {positivity_index(probs):.2f}"
     fig = bar_fig(probs, "Text Emotions")
     return msg, fig, json.dumps(probs)
@@ -167,7 +174,9 @@ def analyze_text(text: str):
 def analyze_voice(audio_path: Optional[str]):
     if not audio_path:
         return "No audio provided.", None, None
-    probs = to_probs(voice_pipe(audio_path))
     msg = f"**Top Voice Emotion:** {top_item(probs)}"
     fig = bar_fig(probs, "Voice Emotions")
     return msg, fig, json.dumps(probs)
@@ -175,15 +184,18 @@ def analyze_voice(audio_path: Optional[str]):
 def analyze_face(image_path: Optional[str]):
     if not image_path:
         return "No image provided.", None, None
-    face_img = crop_face(image_path)
-    probs = to_probs(face_pipe(face_img))
     msg = f"**Top Face Emotion:** {top_item(probs)}"
     fig = bar_fig(probs, "Face Emotions")
     return msg, fig, json.dumps(probs)
-# -----------------------------
 # PDF Report
-# -----------------------------
 def build_pdf(text_in: str,
               text_prob: Optional[Dict[str,float]],
               voice_prob: Optional[Dict[str,float]],
@@ -194,17 +206,18 @@ def build_pdf(text_in: str,
     # save charts
     paths = []
-    if text_prob:  save_chart(text_prob,  "Text Emotions",  "charts/text.png");  paths.append("charts/text.png")
-    if voice_prob: save_chart(voice_prob, "Voice Emotions", "charts/voice.png"); paths.append("charts/voice.png")
-    if face_prob:  save_chart(face_prob,  "Face Emotions",  "charts/face.png");  paths.append("charts/face.png")
-    if fused_prob: save_chart(fused_prob, "Fused Profile",  "charts/fused.png"); paths.append("charts/fused.png")
     pdf = FPDF()
     pdf.add_page()
     pdf.set_font("Arial", size=16)
-    pdf.cell(0, 10, "Multi-Modal Emotion Report", ln=True, align="C")
     pdf.set_font("Arial", size=12)
     pdf.cell(0, 8, f"Timestamp: {datetime.datetime.now().isoformat(sep=' ', timespec='seconds')}", ln=True)
     pdf.multi_cell(0, 8, f"Input Text: {text_in or '(none)'}")
     pdf.ln(2)
@@ -226,18 +239,18 @@ def build_pdf(text_in: str,
                 pdf.multi_cell(0, 7, f" • {t}")
         pdf.ln(2)
-    for p in paths:
-        if os.path.exists(p):
-            pdf.image(p, w=180)
             pdf.ln(4)
     out = "emotion_report.pdf"
     pdf.output(out)
     return out
-# -----------------------------
-# Trends
-# -----------------------------
 def log_run(row: dict):
     df = pd.read_csv(RUN_LOG)
     df.loc[len(df)] = row
@@ -248,37 +261,35 @@ def plot_trends():
         return None
     df = pd.read_csv(RUN_LOG)
     if df.empty: return None
-    df["date"] = pd.to_datetime(df["timestamp"]).dt.date
     daily = df.groupby("date")["pos_index"].mean().reset_index()
-    fig, ax = plt.subplots(figsize=(7,3.2))
     ax.plot(daily["date"], daily["pos_index"], marker="o")
     ax.set_ylim(0,1)
-    ax.set_ylabel("Positivity Index (0-1)")
     ax.set_title("Positivity Trend")
     plt.xticks(rotation=25, ha="right"); plt.tight_layout()
     return fig
-# -----------------------------
-# Fusion handler
-# -----------------------------
 def fuse_and_report(text_json, voice_json, face_json, text_raw, w_text, w_voice, w_face):
     te = json.loads(text_json)  if text_json  else None
     ve = json.loads(voice_json) if voice_json else None
     fe = json.loads(face_json)  if face_json  else None
-    weights = [w_text, w_voice, w_face]
-    s = sum(weights) or 1.0
-    weights = [w/s for w in weights]
-    fused = union_merge([te, ve, fe], weights) if (te or ve or fe) else None
-    # safety + CBT
     safety_level, safety_hits = safety_screen(text_raw or "")
     distos = detect_distortions(text_raw or "")
-    tips = reframe_tips(distos)
-    # pdf
     pdf_path = build_pdf(text_raw, te, ve, fe, fused, safety_level, safety_hits, distos, tips)
-    # log
     pi_val = positivity_index(te)
     log_run({
         "timestamp": datetime.datetime.now().isoformat(sep=" ", timespec="seconds"),
@@ -290,26 +301,49 @@ def fuse_and_report(text_json, voice_json, face_json, text_raw, w_text, w_voice,
         "pos_index": pi_val
     })
-    msg = f"**Fused Top:** {top_item(fused) or '(insufficient inputs)'}  |  Weights → Text:{weights[0]:.2f}, Voice:{weights[1]:.2f}, Face:{weights[2]:.2f}"
     plot = bar_fig(fused, "Fused Emotional Profile") if fused else None
     return msg, plot, pdf_path
-# -----------------------------
 # Gradio UI
-# -----------------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🧠 Multi-Modal Emotion AI (Text + Voice + Face)")
     gr.Markdown("Analyze emotions across **text, voice, and face**, detect **safety risks** and **cognitive distortions**, "
                 "tune **fusion weights**, and download a **PDF report**. Audio/image are optional.")
-    # state holders
     st_text_json  = gr.State()
     st_voice_json = gr.State()
     st_face_json  = gr.State()
     st_text_raw   = gr.State()
     with gr.Tab("📝 Text"):
-        t_in = gr.Textbox(label="Your text", lines=3, placeholder="How are you feeling today?")
         t_btn = gr.Button("Analyze Text", variant="primary")
         t_msg = gr.Markdown()
         t_plot = gr.Plot()
@@ -319,14 +353,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         t_btn.click(_t_chain, inputs=t_in, outputs=[t_msg, t_plot, st_text_json, st_text_raw])
     with gr.Tab("🎤 Voice"):
-        a_in = gr.Audio(sources=["microphone","upload"], type="filepath", label="Record or upload audio (optional)")
         a_btn = gr.Button("Analyze Voice", variant="primary")
         a_msg = gr.Markdown()
         a_plot = gr.Plot()
         a_btn.click(analyze_voice, inputs=a_in, outputs=[a_msg, a_plot, st_voice_json])
     with gr.Tab("📷 Face"):
-        f_in = gr.Image(type="filepath", label="Upload a face image (optional)")
         f_btn = gr.Button("Analyze Face", variant="primary")
         f_msg = gr.Markdown()
         f_plot = gr.Plot()
@@ -334,9 +369,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Tab("🧩 Fusion + Report"):
         with gr.Row():
-            w_text  = gr.Slider(0, 1, value=0.5, step=0.05, label="Text weight")
-            w_voice = gr.Slider(0, 1, value=0.3, step=0.05, label="Voice weight")
-            w_face  = gr.Slider(0, 1, value=0.2, step=0.05, label="Face weight")
         fuse_btn = gr.Button("Fuse & Generate PDF", variant="primary")
         fuse_msg = gr.Markdown()
         fuse_plot = gr.Plot()
@@ -344,20 +379,28 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         fuse_btn.click(
             fuse_and_report,
             inputs=[st_text_json, st_voice_json, st_face_json, st_text_raw, w_text, w_voice, w_face],
-            outputs=[fuse_msg, fuse_plot, fuse_pdf]
         )
     with gr.Tab("📈 Trends"):
         tr_btn = gr.Button("Refresh Positivity Trend")
         tr_plot = gr.Plot()
         tr_btn.click(plot_trends, inputs=None, outputs=tr_plot)
-    with gr.Tab("ℹ️ About"):
-        gr.Markdown(
-            "Models: **GoEmotions (text)**, **Wav2Vec2-ER (audio)**, **ViT-Face-Expression (image)**. "
-            "Privacy: inputs are processed in-session; reports are generated client-side on this Space. "
-            "This is an educational demo — not medical advice."
-        )
 app = demo

+# app.py — Multi-Modal Emotion AI (Text • Voice • Face)
+# College-showcase build: fusion (weighted), PDF reports, safety + CBT tips,
+# trends logging, auto face-crop, version banner, demo button, API endpoint (via api_name).
+# Works on Hugging Face Spaces free CPU (GPU faster, but not required).
+import os, json, datetime
 from typing import Dict, List, Optional, Tuple
 import numpy as np
 from fpdf import FPDF
 from transformers import pipeline
+# =========================
+# Config / metadata
+# =========================
+APP_NAME     = "Multi-Modal Emotion AI"
+APP_VERSION  = "v1.3"
+RUN_LOG      = "runs.csv"
+CHARTS_DIR   = "charts"
+os.makedirs(CHARTS_DIR, exist_ok=True)
+if not os.path.exists(RUN_LOG):
+    pd.DataFrame(columns=[
+        "timestamp","text","text_top","voice_top","face_top","fused_top","pos_index"
+    ]).to_csv(RUN_LOG, index=False)
+# Public models (kept light; all public)
+TEXT_MODEL  = "SamLowe/roberta-base-go_emotions"   # text emotions (GoEmotions, 27)
+VOICE_MODEL = "superb/wav2vec2-base-superb-er"     # voice emotion recognition
 FACE_MODEL  = "trpakov/vit-face-expression"        # facial expression (ViT)
+# Pipelines (cached once)
 text_pipe  = pipeline("text-classification",  model=TEXT_MODEL,  top_k=None)
 voice_pipe = pipeline("audio-classification", model=VOICE_MODEL, top_k=None)
 face_pipe  = pipeline("image-classification", model=FACE_MODEL,  top_k=None)
+# =========================
+# Safety & CBT utilities
+# =========================
 RISK_TERMS = {
     "self_harm": ["kill myself","end it","suicide","self harm","cutting","overdose"],
     "violence":  ["hurt them","attack","kill them","shoot","stab","revenge"]
 }
 DISTORTIONS = {
     "catastrophizing":   ["ruined","disaster","worst ever","nothing will work","everything is over"],
     "all_or_nothing":    ["always","never","completely","totally","entirely"],
 def reframe_tips(names: List[str]) -> List[str]:
     return [REFRAMES[n] for n in names if n in REFRAMES]
+# =========================
+# Emotion helpers
+# =========================
+POSITIVE = set(["admiration","amusement","approval","gratitude","joy","love",
+                "optimism","relief","pride","excitement"])
+NEGATIVE = set(["anger","annoyance","disappointment","disapproval","disgust",
+                "embarrassment","fear","grief","nervousness","remorse","sadness"])
+def pipe_to_probs(outputs) -> Dict[str,float]:
     # pipelines return list[list[{"label","score"}]] when top_k=None
     if isinstance(outputs, list) and outputs and isinstance(outputs[0], list):
         outputs = outputs[0]
     neg = sum(prob.get(k,0.0) for k in NEGATIVE)
     return round((pos - neg + 1)/2, 4)  # [-1,1] -> [0,1]
+def merge_probs(dicts: List[Optional[Dict[str,float]]], weights: List[float]) -> Dict[str,float]:
     labels = set()
     for d in dicts:
         if d: labels |= set(d.keys())
 def bar_fig(prob: Dict[str,float], title: str):
     labels = list(prob.keys())
     vals = [prob[k]*100 for k in labels]
+    fig, ax = plt.subplots(figsize=(7.2, 3.6))
     ax.bar(labels, vals)
     ax.set_ylim(0, 100)
     ax.set_ylabel("Probability (%)")
     fig.savefig(path, dpi=160, bbox_inches="tight")
     plt.close(fig)
+# =========================
 # Computer vision: face crop
+# =========================
 HAAR = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
 def crop_face(image_path: str) -> Image.Image:
     try:
         img = cv2.imread(image_path)
+        if img is None:
             return Image.open(image_path).convert("RGB")
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         faces = HAAR.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5, minSize=(80,80))
     except Exception:
         return Image.open(image_path).convert("RGB")
+# =========================
 # Per-modality inference
+# =========================
 def analyze_text(text: str):
     if not text or not text.strip():
         return gr.Error("Please enter text."), None, None
+    with gr.Progress() as p:
+        p(0.15, desc="Analyzing text…")
+        probs = pipe_to_probs(text_pipe(text))
     msg = f"**Top Text Emotion:** {top_item(probs)}  |  **Positivity Index:** {positivity_index(probs):.2f}"
     fig = bar_fig(probs, "Text Emotions")
     return msg, fig, json.dumps(probs)
 def analyze_voice(audio_path: Optional[str]):
     if not audio_path:
         return "No audio provided.", None, None
+    with gr.Progress() as p:
+        p(0.15, desc="Analyzing voice…")
+        probs = pipe_to_probs(voice_pipe(audio_path))
     msg = f"**Top Voice Emotion:** {top_item(probs)}"
     fig = bar_fig(probs, "Voice Emotions")
     return msg, fig, json.dumps(probs)
 def analyze_face(image_path: Optional[str]):
     if not image_path:
         return "No image provided.", None, None
+    with gr.Progress() as p:
+        p(0.15, desc="Detecting face…")
+        face_img = crop_face(image_path)
+        p(0.6, desc="Analyzing facial expression…")
+        probs = pipe_to_probs(face_pipe(face_img))
     msg = f"**Top Face Emotion:** {top_item(probs)}"
     fig = bar_fig(probs, "Face Emotions")
     return msg, fig, json.dumps(probs)
+# =========================
 # PDF Report
+# =========================
 def build_pdf(text_in: str,
               text_prob: Optional[Dict[str,float]],
               voice_prob: Optional[Dict[str,float]],
     # save charts
     paths = []
+    if text_prob:  save_chart(text_prob,  "Text Emotions",  os.path.join(CHARTS_DIR,"text.png"));  paths.append(os.path.join(CHARTS_DIR,"text.png"))
+    if voice_prob: save_chart(voice_prob, "Voice Emotions", os.path.join(CHARTS_DIR,"voice.png")); paths.append(os.path.join(CHARTS_DIR,"voice.png"))
+    if face_prob:  save_chart(face_prob,  "Face Emotions",  os.path.join(CHARTS_DIR,"face.png"));  paths.append(os.path.join(CHARTS_DIR,"face.png"))
+    if fused_prob: save_chart(fused_prob, "Fused Profile",  os.path.join(CHARTS_DIR,"fused.png")); paths.append(os.path.join(CHARTS_DIR,"fused.png"))
     pdf = FPDF()
     pdf.add_page()
     pdf.set_font("Arial", size=16)
+    pdf.cell(0, 10, f"{APP_NAME} — Report", ln=True, align="C")
     pdf.set_font("Arial", size=12)
+    pdf.cell(0, 8, f"Version: {APP_VERSION}", ln=True)
     pdf.cell(0, 8, f"Timestamp: {datetime.datetime.now().isoformat(sep=' ', timespec='seconds')}", ln=True)
     pdf.multi_cell(0, 8, f"Input Text: {text_in or '(none)'}")
     pdf.ln(2)
                 pdf.multi_cell(0, 7, f" • {t}")
         pdf.ln(2)
+    for pth in paths:
+        if os.path.exists(pth):
+            pdf.image(pth, w=180)
             pdf.ln(4)
     out = "emotion_report.pdf"
     pdf.output(out)
     return out
+# =========================
+# Trends logging
+# =========================
 def log_run(row: dict):
     df = pd.read_csv(RUN_LOG)
     df.loc[len(df)] = row
         return None
     df = pd.read_csv(RUN_LOG)
     if df.empty: return None
+    df["timestamp"] = pd.to_datetime(df["timestamp"])
+    df["date"] = df["timestamp"].dt.date
     daily = df.groupby("date")["pos_index"].mean().reset_index()
+    fig, ax = plt.subplots(figsize=(7.2,3.2))
     ax.plot(daily["date"], daily["pos_index"], marker="o")
     ax.set_ylim(0,1)
+    ax.set_ylabel("Positivity Index (0–1)")
     ax.set_title("Positivity Trend")
     plt.xticks(rotation=25, ha="right"); plt.tight_layout()
     return fig
+# =========================
+# Fusion + API handler
+# =========================
 def fuse_and_report(text_json, voice_json, face_json, text_raw, w_text, w_voice, w_face):
     te = json.loads(text_json)  if text_json  else None
     ve = json.loads(voice_json) if voice_json else None
     fe = json.loads(face_json)  if face_json  else None
+    ws = [w_text, w_voice, w_face]
+    s  = sum(ws) or 1.0
+    weights = [w/s for w in ws]
+    fused = merge_probs([te, ve, fe], weights) if (te or ve or fe) else None
     safety_level, safety_hits = safety_screen(text_raw or "")
     distos = detect_distortions(text_raw or "")
+    tips   = reframe_tips(distos)
     pdf_path = build_pdf(text_raw, te, ve, fe, fused, safety_level, safety_hits, distos, tips)
     pi_val = positivity_index(te)
     log_run({
         "timestamp": datetime.datetime.now().isoformat(sep=" ", timespec="seconds"),
         "pos_index": pi_val
     })
+    msg  = f"**Fused Top:** {top_item(fused) or '(insufficient inputs)'}"
+    msg += f"  |  Weights → Text:{weights[0]:.2f}, Voice:{weights[1]:.2f}, Face:{weights[2]:.2f}"
     plot = bar_fig(fused, "Fused Emotional Profile") if fused else None
     return msg, plot, pdf_path
+# Optional text-only JSON API (returns distribution) — exposed as /run/text_api via api_name
+def text_api(text: str):
+    if not text or not text.strip():
+        return {"error":"text required"}
+    probs = pipe_to_probs(text_pipe(text))
+    return {
+        "top": top_item(probs),
+        "positivity_index": positivity_index(probs),
+        "distribution": probs
+    }
+# =========================
 # Gradio UI
+# =========================
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    # Version banner
+    gr.HTML(f"""
+    <div style="padding:10px 12px;border:1px solid #eee;border-radius:10px;
+                display:flex;justify-content:space-between;align-items:center;">
+        <div><b>🧠 {APP_NAME}</b> — <span style="opacity:.8">Text • Voice • Face</span></div>
+        <div style="opacity:.7">{APP_VERSION} · MIT · Made with 🤗</div>
+    </div>
+    """)
     gr.Markdown("Analyze emotions across **text, voice, and face**, detect **safety risks** and **cognitive distortions**, "
                 "tune **fusion weights**, and download a **PDF report**. Audio/image are optional.")
+    # State
     st_text_json  = gr.State()
     st_voice_json = gr.State()
     st_face_json  = gr.State()
     st_text_raw   = gr.State()
+    with gr.Row():
+        demo_btn  = gr.Button("Load demo text", variant="secondary")
+        reset_btn = gr.Button("Reset Weights", variant="secondary")
     with gr.Tab("📝 Text"):
+        t_in  = gr.Textbox(label="Your text", lines=3, placeholder="How are you feeling today?")
         t_btn = gr.Button("Analyze Text", variant="primary")
         t_msg = gr.Markdown()
         t_plot = gr.Plot()
         t_btn.click(_t_chain, inputs=t_in, outputs=[t_msg, t_plot, st_text_json, st_text_raw])
     with gr.Tab("🎤 Voice"):
+        a_in  = gr.Audio(sources=["microphone","upload"], type="filepath",
+                         label="Record or upload audio (optional)")
         a_btn = gr.Button("Analyze Voice", variant="primary")
         a_msg = gr.Markdown()
         a_plot = gr.Plot()
         a_btn.click(analyze_voice, inputs=a_in, outputs=[a_msg, a_plot, st_voice_json])
     with gr.Tab("📷 Face"):
+        f_in  = gr.Image(type="filepath", label="Upload a face image (optional)")
         f_btn = gr.Button("Analyze Face", variant="primary")
         f_msg = gr.Markdown()
         f_plot = gr.Plot()
     with gr.Tab("🧩 Fusion + Report"):
         with gr.Row():
+            w_text  = gr.Slider(0, 1, value=0.50, step=0.05, label="Text weight")
+            w_voice = gr.Slider(0, 1, value=0.30, step=0.05, label="Voice weight")
+            w_face  = gr.Slider(0, 1, value=0.20, step=0.05, label="Face weight")
         fuse_btn = gr.Button("Fuse & Generate PDF", variant="primary")
         fuse_msg = gr.Markdown()
         fuse_plot = gr.Plot()
         fuse_btn.click(
             fuse_and_report,
             inputs=[st_text_json, st_voice_json, st_face_json, st_text_raw, w_text, w_voice, w_face],
+            outputs=[fuse_msg, fuse_plot, fuse_pdf],
+            api_name="predict"   # public API endpoint for this action
         )
+        def _reset():
+            return 0.50, 0.30, 0.20
+        reset_btn.click(_reset, None, [w_text, w_voice, w_face])
     with gr.Tab("📈 Trends"):
         tr_btn = gr.Button("Refresh Positivity Trend")
         tr_plot = gr.Plot()
         tr_btn.click(plot_trends, inputs=None, outputs=tr_plot)
+    with gr.Tab("🔌 API"):
+        gr.Markdown("**Text-only JSON API** (for quick programmatic use).")
+        api_in  = gr.Textbox(label="Text")
+        api_out = gr.JSON(label="Response")
+        gr.Button("Run API").click(text_api, inputs=api_in, outputs=api_out, api_name="text_api")
+    # Demo filler
+    def load_demo():
+        return "I’m stressed about deadlines but also excited for the opportunity."
+    demo_btn.click(load_demo, None, t_in)
 app = demo