Spaces:

ctizzzy0
/

multi-modal-emotion-ai

Sleeping

App Files Files Community

ctizzzy0 commited on 16 days ago

Commit

38e5187

verified ·

1 Parent(s): c9236ad

Update app.py

Browse files

Files changed (1) hide show

app.py +211 -68

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# app.py — Multi-Modal Emotion AI (Text • Voice • Face)
-# College-showcase build: fusion (weighted), PDF reports, safety + CBT tips,
-# trends logging, auto face-crop, version banner, demo button, API endpoint (via api_name).
-# Works on Hugging Face Spaces free CPU (GPU faster, but not required).
-import os, json, datetime
 from typing import Dict, List, Optional, Tuple
 import numpy as np
@@ -17,10 +17,10 @@ from fpdf import FPDF
 from transformers import pipeline
 # =========================
-# Config / metadata
 # =========================
 APP_NAME     = "Multi-Modal Emotion AI"
-APP_VERSION  = "v1.3"
 RUN_LOG      = "runs.csv"
 CHARTS_DIR   = "charts"
 os.makedirs(CHARTS_DIR, exist_ok=True)
@@ -29,12 +29,13 @@ if not os.path.exists(RUN_LOG):
         "timestamp","text","text_top","voice_top","face_top","fused_top","pos_index"
     ]).to_csv(RUN_LOG, index=False)
-# Public models (kept light; all public)
 TEXT_MODEL  = "SamLowe/roberta-base-go_emotions"   # text emotions (GoEmotions, 27)
 VOICE_MODEL = "superb/wav2vec2-base-superb-er"     # voice emotion recognition
 FACE_MODEL  = "trpakov/vit-face-expression"        # facial expression (ViT)
-# Pipelines (cached once)
 text_pipe  = pipeline("text-classification",  model=TEXT_MODEL,  top_k=None)
 voice_pipe = pipeline("audio-classification", model=VOICE_MODEL, top_k=None)
 face_pipe  = pipeline("image-classification", model=FACE_MODEL,  top_k=None)
@@ -141,7 +142,7 @@ def save_chart(prob: Dict[str,float], title: str, path: str):
     plt.close(fig)
 # =========================
-# Computer vision: face crop
 # =========================
 HAAR = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
 def crop_face(image_path: str) -> Image.Image:
@@ -158,24 +159,41 @@ def crop_face(image_path: str) -> Image.Image:
     except Exception:
         return Image.open(image_path).convert("RGB")
 # =========================
 # Per-modality inference
 # =========================
 def analyze_text(text: str):
     if not text or not text.strip():
-        return gr.Error("Please enter text."), None, None
     with gr.Progress() as p:
-        p(0.15, desc="Analyzing text…")
         probs = pipe_to_probs(text_pipe(text))
     msg = f"**Top Text Emotion:** {top_item(probs)}  |  **Positivity Index:** {positivity_index(probs):.2f}"
     fig = bar_fig(probs, "Text Emotions")
-    return msg, fig, json.dumps(probs)
 def analyze_voice(audio_path: Optional[str]):
     if not audio_path:
         return "No audio provided.", None, None
     with gr.Progress() as p:
-        p(0.15, desc="Analyzing voice…")
         probs = pipe_to_probs(voice_pipe(audio_path))
     msg = f"**Top Voice Emotion:** {top_item(probs)}"
     fig = bar_fig(probs, "Voice Emotions")
@@ -185,16 +203,66 @@ def analyze_face(image_path: Optional[str]):
     if not image_path:
         return "No image provided.", None, None
     with gr.Progress() as p:
-        p(0.15, desc="Detecting face…")
         face_img = crop_face(image_path)
-        p(0.6, desc="Analyzing facial expression…")
         probs = pipe_to_probs(face_pipe(face_img))
     msg = f"**Top Face Emotion:** {top_item(probs)}"
     fig = bar_fig(probs, "Face Emotions")
     return msg, fig, json.dumps(probs)
 # =========================
-# PDF Report
 # =========================
 def build_pdf(text_in: str,
               text_prob: Optional[Dict[str,float]],
@@ -202,7 +270,8 @@ def build_pdf(text_in: str,
               face_prob: Optional[Dict[str,float]],
               fused_prob: Optional[Dict[str,float]],
               safety_level: str, safety_hits: Dict[str,List[str]],
-              distortions: List[str], tips: List[str]) -> str:
     # save charts
     paths = []
@@ -211,6 +280,7 @@ def build_pdf(text_in: str,
     if face_prob:  save_chart(face_prob,  "Face Emotions",  os.path.join(CHARTS_DIR,"face.png"));  paths.append(os.path.join(CHARTS_DIR,"face.png"))
     if fused_prob: save_chart(fused_prob, "Fused Profile",  os.path.join(CHARTS_DIR,"fused.png")); paths.append(os.path.join(CHARTS_DIR,"fused.png"))
     pdf = FPDF()
     pdf.add_page()
     pdf.set_font("Arial", size=16)
@@ -231,8 +301,8 @@ def build_pdf(text_in: str,
         pdf.set_text_color(0,0,0)
         pdf.ln(2)
-    if distortions:
-        pdf.cell(0, 8, f"Cognitive distortions: {', '.join(distortions)}", ln=True)
         if tips:
             pdf.cell(0, 8, "Reframe suggestions:", ln=True)
             for t in tips:
@@ -244,14 +314,34 @@ def build_pdf(text_in: str,
             pdf.image(pth, w=180)
             pdf.ln(4)
     out = "emotion_report.pdf"
     pdf.output(out)
     return out
 # =========================
-# Trends logging
 # =========================
-def log_run(row: dict):
     df = pd.read_csv(RUN_LOG)
     df.loc[len(df)] = row
     df.to_csv(RUN_LOG, index=False)
@@ -272,10 +362,14 @@ def plot_trends():
     plt.xticks(rotation=25, ha="right"); plt.tight_layout()
     return fig
 # =========================
-# Fusion + API handler
 # =========================
-def fuse_and_report(text_json, voice_json, face_json, text_raw, w_text, w_voice, w_face):
     te = json.loads(text_json)  if text_json  else None
     ve = json.loads(voice_json) if voice_json else None
     fe = json.loads(face_json)  if face_json  else None
@@ -285,81 +379,109 @@ def fuse_and_report(text_json, voice_json, face_json, text_raw, w_text, w_voice,
     fused = merge_probs([te, ve, fe], weights) if (te or ve or fe) else None
     safety_level, safety_hits = safety_screen(text_raw or "")
-    distos = detect_distortions(text_raw or "")
-    tips   = reframe_tips(distos)
-    pdf_path = build_pdf(text_raw, te, ve, fe, fused, safety_level, safety_hits, distos, tips)
-    pi_val = positivity_index(te)
     log_run({
         "timestamp": datetime.datetime.now().isoformat(sep=" ", timespec="seconds"),
         "text": text_raw or "",
         "text_top": top_item(te),
         "voice_top": top_item(ve),
         "face_top": top_item(fe),
-        "fused_top": top_item(fused),
-        "pos_index": pi_val
-    })
-    msg  = f"**Fused Top:** {top_item(fused) or '(insufficient inputs)'}"
-    msg += f"  |  Weights → Text:{weights[0]:.2f}, Voice:{weights[1]:.2f}, Face:{weights[2]:.2f}"
-    plot = bar_fig(fused, "Fused Emotional Profile") if fused else None
-    return msg, plot, pdf_path
-# Optional text-only JSON API (returns distribution) — exposed as /run/text_api via api_name
 def text_api(text: str):
     if not text or not text.strip():
         return {"error":"text required"}
     probs = pipe_to_probs(text_pipe(text))
     return {
         "top": top_item(probs),
         "positivity_index": positivity_index(probs),
-        "distribution": probs
     }
 # =========================
 # Gradio UI
 # =========================
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    # Version banner
     gr.HTML(f"""
     <div style="padding:10px 12px;border:1px solid #eee;border-radius:10px;
                 display:flex;justify-content:space-between;align-items:center;">
-        <div><b>🧠 {APP_NAME}</b> — <span style="opacity:.8">Text • Voice • Face</span></div>
         <div style="opacity:.7">{APP_VERSION} · MIT · Made with 🤗</div>
     </div>
     """)
     gr.Markdown("Analyze emotions across **text, voice, and face**, detect **safety risks** and **cognitive distortions**, "
-                "tune **fusion weights**, and download a **PDF report**. Audio/image are optional.")
-    # State
-    st_text_json  = gr.State()
-    st_voice_json = gr.State()
-    st_face_json  = gr.State()
-    st_text_raw   = gr.State()
     with gr.Row():
         demo_btn  = gr.Button("Load demo text", variant="secondary")
-        reset_btn = gr.Button("Reset Weights", variant="secondary")
     with gr.Tab("📝 Text"):
         t_in  = gr.Textbox(label="Your text", lines=3, placeholder="How are you feeling today?")
         t_btn = gr.Button("Analyze Text", variant="primary")
         t_msg = gr.Markdown()
         t_plot = gr.Plot()
         def _t_chain(txt):
-            msg, fig, j = analyze_text(txt)
-            return msg, fig, j, txt
-        t_btn.click(_t_chain, inputs=t_in, outputs=[t_msg, t_plot, st_text_json, st_text_raw])
     with gr.Tab("🎤 Voice"):
-        a_in  = gr.Audio(sources=["microphone","upload"], type="filepath",
-                         label="Record or upload audio (optional)")
         a_btn = gr.Button("Analyze Voice", variant="primary")
         a_msg = gr.Markdown()
         a_plot = gr.Plot()
         a_btn.click(analyze_voice, inputs=a_in, outputs=[a_msg, a_plot, st_voice_json])
     with gr.Tab("📷 Face"):
         f_in  = gr.Image(type="filepath", label="Upload a face image (optional)")
         f_btn = gr.Button("Analyze Face", variant="primary")
@@ -367,39 +489,60 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         f_plot = gr.Plot()
         f_btn.click(analyze_face, inputs=f_in, outputs=[f_msg, f_plot, st_face_json])
-    with gr.Tab("🧩 Fusion + Report"):
         with gr.Row():
-            w_text  = gr.Slider(0, 1, value=0.50, step=0.05, label="Text weight")
             w_voice = gr.Slider(0, 1, value=0.30, step=0.05, label="Voice weight")
-            w_face  = gr.Slider(0, 1, value=0.20, step=0.05, label="Face weight")
-        fuse_btn = gr.Button("Fuse & Generate PDF", variant="primary")
         fuse_msg = gr.Markdown()
         fuse_plot = gr.Plot()
-        fuse_pdf  = gr.File(label="Download Report")
         fuse_btn.click(
-            fuse_and_report,
-            inputs=[st_text_json, st_voice_json, st_face_json, st_text_raw, w_text, w_voice, w_face],
-            outputs=[fuse_msg, fuse_plot, fuse_pdf],
-            api_name="predict"   # public API endpoint for this action
         )
-        def _reset():
-            return 0.50, 0.30, 0.20
-        reset_btn.click(_reset, None, [w_text, w_voice, w_face])
-    with gr.Tab("📈 Trends"):
         tr_btn = gr.Button("Refresh Positivity Trend")
         tr_plot = gr.Plot()
         tr_btn.click(plot_trends, inputs=None, outputs=tr_plot)
     with gr.Tab("🔌 API"):
-        gr.Markdown("**Text-only JSON API** (for quick programmatic use).")
         api_in  = gr.Textbox(label="Text")
         api_out = gr.JSON(label="Response")
         gr.Button("Run API").click(text_api, inputs=api_in, outputs=api_out, api_name="text_api")
     # Demo filler
     def load_demo():
-        return "I’m stressed about deadlines but also excited for the opportunity."
     demo_btn.click(load_demo, None, t_in)
 app = demo

+# app.py — Multi-Modal Emotion AI (Text • Voice • Face) — "Super Useful" Edition
+# Big upgrades: Fusion (weighted), PDF report, Safety + CBT, Coaching Plan generator,
+# Privacy controls (opt-out logging + clear data), Trends, Face auto-crop, Demo button,
+# Lightweight keyword "triggers", simple JSON API. CPU-friendly for Hugging Face Spaces.
+import os, json, datetime, re
 from typing import Dict, List, Optional, Tuple
 import numpy as np
 from transformers import pipeline
 # =========================
+# App config / metadata
 # =========================
 APP_NAME     = "Multi-Modal Emotion AI"
+APP_VERSION  = "v2.0"
 RUN_LOG      = "runs.csv"
 CHARTS_DIR   = "charts"
 os.makedirs(CHARTS_DIR, exist_ok=True)
         "timestamp","text","text_top","voice_top","face_top","fused_top","pos_index"
     ]).to_csv(RUN_LOG, index=False)
+# =========================
+# Public, lightweight models
+# =========================
 TEXT_MODEL  = "SamLowe/roberta-base-go_emotions"   # text emotions (GoEmotions, 27)
 VOICE_MODEL = "superb/wav2vec2-base-superb-er"     # voice emotion recognition
 FACE_MODEL  = "trpakov/vit-face-expression"        # facial expression (ViT)
 text_pipe  = pipeline("text-classification",  model=TEXT_MODEL,  top_k=None)
 voice_pipe = pipeline("audio-classification", model=VOICE_MODEL, top_k=None)
 face_pipe  = pipeline("image-classification", model=FACE_MODEL,  top_k=None)
     plt.close(fig)
 # =========================
+# Computer vision: face auto-crop
 # =========================
 HAAR = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
 def crop_face(image_path: str) -> Image.Image:
     except Exception:
         return Image.open(image_path).convert("RGB")
+# =========================
+# Lightweight keyword "triggers"
+# =========================
+STOP = set("""a an and the of to in is it for on with as that this i im i'm are was were be been being by from at or but if then so very really just kind quite rather about into up down over under again further once do does did doing have has had having not no nor only own same than too s t can will don should now""".split())
+def extract_triggers(text: str, top_k: int = 6) -> List[str]:
+    if not text: return []
+    tokens = re.findall(r"[a-zA-Z']{3,}", text.lower())
+    words = [w for w in tokens if w not in STOP]
+    if not words: return []
+    counts = {}
+    for w in words: counts[w] = counts.get(w,0) + 1
+    ranked = sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))
+    return [w for w,_ in ranked[:top_k]]
 # =========================
 # Per-modality inference
 # =========================
 def analyze_text(text: str):
     if not text or not text.strip():
+        return gr.Error("Please enter text."), None, None, None, None
     with gr.Progress() as p:
+        p(0.2, desc="Analyzing text emotions…")
         probs = pipe_to_probs(text_pipe(text))
     msg = f"**Top Text Emotion:** {top_item(probs)}  |  **Positivity Index:** {positivity_index(probs):.2f}"
     fig = bar_fig(probs, "Text Emotions")
+    distos = detect_distortions(text)
+    tips   = reframe_tips(distos)
+    triggers = extract_triggers(text)
+    return msg, fig, json.dumps(probs), json.dumps({"distortions":distos,"tips":tips}), json.dumps({"triggers":triggers})
 def analyze_voice(audio_path: Optional[str]):
     if not audio_path:
         return "No audio provided.", None, None
     with gr.Progress() as p:
+        p(0.2, desc="Analyzing voice emotions…")
         probs = pipe_to_probs(voice_pipe(audio_path))
     msg = f"**Top Voice Emotion:** {top_item(probs)}"
     fig = bar_fig(probs, "Voice Emotions")
     if not image_path:
         return "No image provided.", None, None
     with gr.Progress() as p:
+        p(0.2, desc="Detecting face…")
         face_img = crop_face(image_path)
+        p(0.7, desc="Analyzing facial expression…")
         probs = pipe_to_probs(face_pipe(face_img))
     msg = f"**Top Face Emotion:** {top_item(probs)}"
     fig = bar_fig(probs, "Face Emotions")
     return msg, fig, json.dumps(probs)
 # =========================
+# Coaching plan (actionable)
+# =========================
+def generate_coaching_plan(text_prob, voice_prob, face_prob, disto_json, safety_level, triggers):
+    # Parse inputs
+    te = json.loads(text_prob)  if text_prob  else {}
+    ve = json.loads(voice_prob) if voice_prob else {}
+    fe = json.loads(face_prob)  if face_prob  else {}
+    distos = json.loads(disto_json).get("distortions", []) if disto_json else []
+    tips   = json.loads(disto_json).get("tips", []) if disto_json else []
+    trig_list = json.loads(triggers).get("triggers", []) if triggers else []
+    # Determine dominant emotions (top across modalities)
+    def topk(d, k=3): return sorted(d.items(), key=lambda kv: kv[1], reverse=True)[:k]
+    dominant = [lab for lab,_ in topk(te,1)+topk(ve,1)+topk(fe,1)]
+    dom_set = set(dominant)
+    # Recipe library
+    exercises = []
+    if "fear" in dom_set or "nervousness" in dom_set:
+        exercises += ["4-7-8 breathing (5 rounds)", "Name 5-4-3-2-1 sensory objects", "Write worst/best/most-likely outcomes"]
+    if "sadness" in dom_set or "grief" in dom_set:
+        exercises += ["Text one friend to check in", "10-minute sunlight walk", "Gratitude list (3 items)"]
+    if "anger" in dom_set or "disgust" in dom_set:
+        exercises += ["2-minute cold water face splash", "Box breathing 4x4x4x4", "Delay response 20 minutes + draft message"]
+    if "joy" in dom_set or "excitement" in dom_set or "admiration" in dom_set:
+        exercises += ["Savoring: write 3 details you enjoyed", "Share win with someone", "Schedule repeat of the activity"]
+    # Personalization via distortions & triggers
+    reframe_block = tips[:3]
+    trigger_block = [f"Avoid or prepare for: {t}" for t in trig_list]
+    # Safety append
+    crisis_block = []
+    if safety_level == "high":
+        crisis_block = ["⚠ If in danger: contact local emergency services.",
+                        "US: 988 (Suicide & Crisis Lifeline)"]
+    # Structure plan
+    plan = {
+        "today": list(dict.fromkeys(exercises))[:5] or ["5-minute mindful breathing", "Short walk outside"],
+        "reframes": reframe_block or ["Reframe: What evidence supports/against my thought?"],
+        "triggers": trigger_block[:5],
+        "sleep": ["Wind-down alarm + devices off 30m before bed", "Keep consistent wake time"],
+        "movement": ["10-15m easy cardio or stretching"],
+        "social": ["Send a supportive text to someone"],
+        "safety": crisis_block
+    }
+    return json.dumps(plan, ensure_ascii=False)
+# =========================
+# PDF Report (analysis + plan)
 # =========================
 def build_pdf(text_in: str,
               text_prob: Optional[Dict[str,float]],
               face_prob: Optional[Dict[str,float]],
               fused_prob: Optional[Dict[str,float]],
               safety_level: str, safety_hits: Dict[str,List[str]],
+              distos: List[str], tips: List[str],
+              plan_json: Optional[str]) -> str:
     # save charts
     paths = []
     if face_prob:  save_chart(face_prob,  "Face Emotions",  os.path.join(CHARTS_DIR,"face.png"));  paths.append(os.path.join(CHARTS_DIR,"face.png"))
     if fused_prob: save_chart(fused_prob, "Fused Profile",  os.path.join(CHARTS_DIR,"fused.png")); paths.append(os.path.join(CHARTS_DIR,"fused.png"))
+    # build PDF
     pdf = FPDF()
     pdf.add_page()
     pdf.set_font("Arial", size=16)
         pdf.set_text_color(0,0,0)
         pdf.ln(2)
+    if distos:
+        pdf.cell(0, 8, f"Cognitive distortions: {', '.join(distos)}", ln=True)
         if tips:
             pdf.cell(0, 8, "Reframe suggestions:", ln=True)
             for t in tips:
             pdf.image(pth, w=180)
             pdf.ln(4)
+    # Coaching plan section
+    if plan_json:
+        try:
+            plan = json.loads(plan_json)
+            pdf.set_font("Arial", size=13)
+            pdf.cell(0, 10, "Personalized Coaching Plan", ln=True)
+            pdf.set_font("Arial", size=12)
+            for sec in ["today","reframes","triggers","movement","sleep","social","safety"]:
+                items = plan.get(sec, [])
+                if not items: continue
+                title = sec.capitalize()
+                pdf.cell(0, 8, f"{title}:", ln=True)
+                for i in items:
+                    pdf.multi_cell(0, 7, f" • {i}")
+                pdf.ln(1)
+        except Exception:
+            pass
     out = "emotion_report.pdf"
     pdf.output(out)
     return out
 # =========================
+# Trends & data controls
 # =========================
+def log_run(row: dict, enable_logging: bool):
+    if not enable_logging:
+        return
     df = pd.read_csv(RUN_LOG)
     df.loc[len(df)] = row
     df.to_csv(RUN_LOG, index=False)
     plt.xticks(rotation=25, ha="right"); plt.tight_layout()
     return fig
+def clear_history():
+    pd.DataFrame(columns=["timestamp","text","text_top","voice_top","face_top","fused_top","pos_index"]).to_csv(RUN_LOG, index=False)
+    return "History cleared."
 # =========================
+# Fusion + end-to-end actions
 # =========================
+def fuse_and_plan(text_json, voice_json, face_json, text_raw, w_text, w_voice, w_face, disto_json, triggers_json):
     te = json.loads(text_json)  if text_json  else None
     ve = json.loads(voice_json) if voice_json else None
     fe = json.loads(face_json)  if face_json  else None
     fused = merge_probs([te, ve, fe], weights) if (te or ve or fe) else None
     safety_level, safety_hits = safety_screen(text_raw or "")
+    distos = json.loads(disto_json).get("distortions", []) if disto_json else []
+    tips   = json.loads(disto_json).get("tips", []) if disto_json else []
+    plan   = generate_coaching_plan(text_json, voice_json, face_json, disto_json, safety_level, triggers_json)
+    msg  = f"**Fused Top:** {top_item(fused) or '(insufficient inputs)'}"
+    msg += f"  |  Weights → Text:{weights[0]:.2f}, Voice:{weights[1]:.2f}, Face:{weights[2]:.2f}"
+    plot = bar_fig(fused, "Fused Emotional Profile") if fused else None
+    return msg, plot, json.dumps({"safety":safety_level,"hits":safety_hits}), plan
+def full_report(text_json, voice_json, face_json, text_raw, fusion_plot, safety_json, disto_json, plan_json, enable_logging):
+    te = json.loads(text_json)  if text_json  else None
+    ve = json.loads(voice_json) if voice_json else None
+    fe = json.loads(face_json)  if face_json  else None
+    safety = json.loads(safety_json) if safety_json else {"safety":"low","hits":{}}
+    distos = json.loads(disto_json).get("distortions", []) if disto_json else []
+    tips   = json.loads(disto_json).get("tips", []) if disto_json else []
+    pdf_path = build_pdf(
+        text_in=text_raw,
+        text_prob=te, voice_prob=ve, face_prob=fe,
+        fused_prob=None,  # fused chart already shown; omit to keep PDF lighter or replace with fused if desired
+        safety_level=safety.get("safety","low"), safety_hits=safety.get("hits",{}),
+        distos=distos, tips=tips,
+        plan_json=plan_json
+    )
+    # Log compact row
     log_run({
         "timestamp": datetime.datetime.now().isoformat(sep=" ", timespec="seconds"),
         "text": text_raw or "",
         "text_top": top_item(te),
         "voice_top": top_item(ve),
         "face_top": top_item(fe),
+        "fused_top": "",  # could compute again if needed
+        "pos_index": positivity_index(te)
+    }, enable_logging=enable_logging)
+    return pdf_path
+# Simple text-only JSON API (for quick programmatic use)
 def text_api(text: str):
     if not text or not text.strip():
         return {"error":"text required"}
     probs = pipe_to_probs(text_pipe(text))
+    distos = detect_distortions(text)
     return {
         "top": top_item(probs),
         "positivity_index": positivity_index(probs),
+        "distribution": probs,
+        "distortions": distos
     }
 # =========================
 # Gradio UI
 # =========================
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    # Header
     gr.HTML(f"""
     <div style="padding:10px 12px;border:1px solid #eee;border-radius:10px;
                 display:flex;justify-content:space-between;align-items:center;">
+        <div><b>🧠 {APP_NAME}</b> — <span style="opacity:.8">Text • Voice • Face • Coaching</span></div>
         <div style="opacity:.7">{APP_VERSION} · MIT · Made with 🤗</div>
     </div>
     """)
     gr.Markdown("Analyze emotions across **text, voice, and face**, detect **safety risks** and **cognitive distortions**, "
+                "generate a **personalized coaching plan**, and download a **PDF report**. Audio/image optional.")
+    # App state
+    st_text_json   = gr.State()
+    st_voice_json  = gr.State()
+    st_face_json   = gr.State()
+    st_text_raw    = gr.State()
+    st_disto_json  = gr.State()
+    st_triggers    = gr.State()
+    st_safety_json = gr.State()
+    st_plan_json   = gr.State()
     with gr.Row():
         demo_btn  = gr.Button("Load demo text", variant="secondary")
+        enable_log = gr.Checkbox(value=True, label="Enable logging for Trends (privacy toggle)")
+    # ---------- Text ----------
     with gr.Tab("📝 Text"):
         t_in  = gr.Textbox(label="Your text", lines=3, placeholder="How are you feeling today?")
         t_btn = gr.Button("Analyze Text", variant="primary")
         t_msg = gr.Markdown()
         t_plot = gr.Plot()
+        t_dist = gr.JSON(label="CBT Distortions & Tips")
+        t_trig = gr.JSON(label="Possible Triggers")
         def _t_chain(txt):
+            msg, fig, j, djson, trig = analyze_text(txt)
+            return msg, fig, j, txt, djson, trig
+        t_btn.click(_t_chain, inputs=t_in, outputs=[t_msg, t_plot, st_text_json, st_text_raw, st_disto_json, st_triggers])
+    # ---------- Voice ----------
     with gr.Tab("🎤 Voice"):
+        a_in  = gr.Audio(sources=["microphone","upload"], type="filepath", label="Record or upload audio (optional)")
         a_btn = gr.Button("Analyze Voice", variant="primary")
         a_msg = gr.Markdown()
         a_plot = gr.Plot()
         a_btn.click(analyze_voice, inputs=a_in, outputs=[a_msg, a_plot, st_voice_json])
+    # ---------- Face ----------
     with gr.Tab("📷 Face"):
         f_in  = gr.Image(type="filepath", label="Upload a face image (optional)")
         f_btn = gr.Button("Analyze Face", variant="primary")
         f_plot = gr.Plot()
         f_btn.click(analyze_face, inputs=f_in, outputs=[f_msg, f_plot, st_face_json])
+    # ---------- Fusion & Plan ----------
+    with gr.Tab("🧩 Fusion + Plan"):
         with gr.Row():
+            w_text  = gr.Slider(0, 1, value=0.55, step=0.05, label="Text weight")
             w_voice = gr.Slider(0, 1, value=0.30, step=0.05, label="Voice weight")
+            w_face  = gr.Slider(0, 1, value=0.15, step=0.05, label="Face weight")
+        fuse_btn = gr.Button("Fuse & Build Coaching Plan", variant="primary")
         fuse_msg = gr.Markdown()
         fuse_plot = gr.Plot()
+        safety_box = gr.JSON(label="Safety screen")
+        plan_box   = gr.JSON(label="Coaching plan (today + reframes + triggers)")
         fuse_btn.click(
+            fuse_and_plan,
+            inputs=[st_text_json, st_voice_json, st_face_json, st_text_raw, w_text, w_voice, w_face, st_disto_json, st_triggers],
+            outputs=[fuse_msg, fuse_plot, st_safety_json, st_plan_json],
+            api_name="fuse"  # expose endpoint
+        ).then(
+            lambda s,p: (s,p), inputs=[st_safety_json, st_plan_json], outputs=[safety_box, plan_box]
         )
+    # ---------- Report ----------
+    with gr.Tab("📄 Report"):
+        report_btn = gr.Button("Generate PDF Report")
+        pdf_out = gr.File(label="Download Report")
+        report_btn.click(
+            full_report,
+            inputs=[st_text_json, st_voice_json, st_face_json, st_text_raw, fuse_plot, st_safety_json, st_disto_json, st_plan_json, enable_log],
+            outputs=pdf_out,
+            api_name="report"
+        )
+    # ---------- Trends & Data ----------
+    with gr.Tab("📈 Trends & Data"):
         tr_btn = gr.Button("Refresh Positivity Trend")
         tr_plot = gr.Plot()
         tr_btn.click(plot_trends, inputs=None, outputs=tr_plot)
+        with gr.Row():
+            dl_btn = gr.Button("Download CSV")
+            dl_out = gr.File()
+            clr_btn = gr.Button("Clear History", variant="stop")
+            clr_msg = gr.Markdown()
+        dl_btn.click(lambda: RUN_LOG if os.path.exists(RUN_LOG) else None, inputs=None, outputs=dl_out)
+        clr_btn.click(lambda: clear_history(), inputs=None, outputs=clr_msg)
+    # ---------- API (text-only quick JSON) ----------
     with gr.Tab("🔌 API"):
+        gr.Markdown("**Text-only JSON API** (quick programmatic use).")
         api_in  = gr.Textbox(label="Text")
         api_out = gr.JSON(label="Response")
         gr.Button("Run API").click(text_api, inputs=api_in, outputs=api_out, api_name="text_api")
     # Demo filler
     def load_demo():
+        return "I'm overwhelmed by school deadlines, but I'm also excited for the new opportunities."
     demo_btn.click(load_demo, None, t_in)
 app = demo