ans123 commited on
Commit
92da2a8
·
verified ·
1 Parent(s): 64984be

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +795 -0
app.py ADDED
@@ -0,0 +1,795 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import faiss
3
+ import numpy as np
4
+ import os
5
+ import json
6
+ import datetime
7
+ import uuid
8
+ import asyncio
9
+ import time
10
+ from typing import Dict, List, Optional, Tuple, Any
11
+ from urllib.parse import urlparse
12
+ from urllib.robotparser import RobotFileParser
13
+
14
+ import pandas as pd
15
+ import ollama
16
+ from duckduckgo_search import DDGS
17
+ import requests
18
+
19
+ # crawl4ai setup
20
+ try:
21
+ from crawl4ai import AsyncWebCrawler, BrowserConfig, CacheMode, CrawlerRunConfig
22
+ from crawl4ai.content_filter_strategy import BM25ContentFilter
23
+ from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
24
+ CRAWL4AI_AVAILABLE = True
25
+ except ImportError:
26
+ print("WARNING: crawl4ai library not found or failed to import. Resource finding will be disabled.")
27
+ print("Install it: pip install 'crawl4ai[playwright]' and run 'playwright install --with-deps'")
28
+ CRAWL4AI_AVAILABLE = False
29
+
30
+
31
+ # --- Configuration ---
32
+ OLLAMA_MODEL = "llama3:8b" # Or your preferred model
33
+ FAISS_INDEX_FILE = "faiss_index.index"
34
+ FAISS_METADATA_FILE = "faiss_metadata.json"
35
+ USER_DATA_DIR = "user_data"
36
+ COMMUNITY_FILE = "community_posts.json"
37
+ os.makedirs(USER_DATA_DIR, exist_ok=True)
38
+
39
+ # FAISS Vector Dimension (Must match Ollama embedding model)
40
+ # nomic-embed-text: 768
41
+ VECTOR_DIMENSION = 768
42
+
43
+
44
+ # --- System Prompts (Same as before) ---
45
+ EMOTION_ANALYSIS_PROMPT = """...""" # Keep as is
46
+ GROWTH_PLAN_PROMPT = """...""" # Keep as is
47
+ RESOURCE_SYNTHESIS_PROMPT = """...""" # Keep as is (adjust if needed for FAISS context)
48
+ COMMUNITY_SUGGESTION_PROMPT = """...""" # Keep as is
49
+
50
+ # --- Data Persistence Functions ---
51
+
52
+ def load_user_data(username: str) -> Dict:
53
+ """Loads data for a specific user from a JSON file."""
54
+ if not username: return {"entries": [], "plans": {}, "resources": {}, "profile": {}}
55
+ filepath = os.path.join(USER_DATA_DIR, f"{username}.json")
56
+ if os.path.exists(filepath):
57
+ try:
58
+ with open(filepath, 'r') as f:
59
+ return json.load(f)
60
+ except json.JSONDecodeError:
61
+ print(f"Warning: Corrupted data file for user {username}. Starting fresh.")
62
+ return {"entries": [], "plans": {}, "resources": {}, "profile": {}} # Return default on error
63
+ else:
64
+ # Create initial structure for new user
65
+ return {"entries": [], "plans": {}, "resources": {}, "profile": {"username": username, "joined": datetime.datetime.now().isoformat(), "points": 0, "goals":{}}}
66
+
67
+ def save_user_data(username: str, data: Dict):
68
+ """Saves data for a specific user to a JSON file."""
69
+ if not username: return
70
+ filepath = os.path.join(USER_DATA_DIR, f"{username}.json")
71
+ with open(filepath, 'w') as f:
72
+ json.dump(data, f, indent=4)
73
+
74
+ def load_community_posts() -> List[Dict]:
75
+ """Loads community posts from a JSON file."""
76
+ if os.path.exists(COMMUNITY_FILE):
77
+ try:
78
+ with open(COMMUNITY_FILE, 'r') as f:
79
+ return json.load(f)
80
+ except json.JSONDecodeError:
81
+ print("Warning: Community posts file corrupted. Starting fresh.")
82
+ return []
83
+ else:
84
+ return []
85
+
86
+ def save_community_posts(posts: List[Dict]):
87
+ """Saves community posts to a JSON file."""
88
+ with open(COMMUNITY_FILE, 'w') as f:
89
+ json.dump(posts, f, indent=4)
90
+
91
+ # --- FAISS and Embedding Functions ---
92
+
93
+ def get_ollama_embeddings(texts: List[str], model_name: str = "nomic-embed-text:latest") -> Tuple[List[List[float]], bool]:
94
+ """Gets embeddings from Ollama. Returns embeddings and a success flag."""
95
+ ollama_api_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") + "/api/embeddings"
96
+ embeddings = []
97
+ all_successful = True
98
+ max_retries = 2
99
+ retry_delay = 1
100
+
101
+ for text in texts:
102
+ if not text or not isinstance(text, str):
103
+ print(f"Warning: Skipping embedding for invalid input: {text}")
104
+ embeddings.append([])
105
+ all_successful = False
106
+ continue
107
+
108
+ current_embedding = []
109
+ for attempt in range(max_retries):
110
+ try:
111
+ response = requests.post(ollama_api_url, json={"model": model_name, "prompt": text}, headers={"Content-Type": "application/json"})
112
+ response.raise_for_status()
113
+ result = response.json()
114
+ if "embedding" in result and len(result["embedding"]) == VECTOR_DIMENSION:
115
+ current_embedding = result["embedding"]
116
+ break # Success for this text
117
+ else:
118
+ print(f"Warning: Ollama response issue (attempt {attempt+1}) for text: {text[:50]}... Response: {result}")
119
+ if attempt == max_retries - 1: all_successful = False
120
+ except Exception as e:
121
+ print(f"Error getting Ollama embedding (Attempt {attempt+1}/{max_retries}): {e}")
122
+ if attempt < max_retries - 1: time.sleep(retry_delay)
123
+ else: all_successful = False
124
+
125
+ embeddings.append(current_embedding) # Append embedding or empty list if failed
126
+
127
+ # Replace empty lists with zero vectors
128
+ final_embeddings = []
129
+ for emb in embeddings:
130
+ if emb:
131
+ final_embeddings.append(emb)
132
+ else:
133
+ print("Warning: Replacing failed embedding with zero vector.")
134
+ final_embeddings.append([0.0] * VECTOR_DIMENSION)
135
+ all_successful = False # Mark overall success as False if any failed
136
+
137
+ return final_embeddings, all_successful
138
+
139
+ def create_or_load_faiss_index() -> Tuple[Optional[faiss.Index], Dict[int, Dict]]:
140
+ """Loads FAISS index and metadata, or creates empty ones."""
141
+ index = None
142
+ metadata = {}
143
+ if os.path.exists(FAISS_INDEX_FILE) and os.path.exists(FAISS_METADATA_FILE):
144
+ try:
145
+ print(f"Loading FAISS index from {FAISS_INDEX_FILE}")
146
+ index = faiss.read_index(FAISS_INDEX_FILE)
147
+ print(f"Loading FAISS metadata from {FAISS_METADATA_FILE}")
148
+ with open(FAISS_METADATA_FILE, 'r') as f:
149
+ # Load metadata, converting string keys back to int
150
+ metadata_str_keys = json.load(f)
151
+ metadata = {int(k): v for k, v in metadata_str_keys.items()}
152
+ print(f"Loaded index with {index.ntotal} vectors and {len(metadata)} metadata entries.")
153
+ # Consistency check (optional but recommended)
154
+ if index.ntotal != len(metadata):
155
+ print(f"WARNING: FAISS index size ({index.ntotal}) != metadata size ({len(metadata)}). Rebuilding might be needed.")
156
+ # Decide recovery strategy: clear both, try to align, etc.
157
+ # Simplest: clear both and start over if inconsistent
158
+ # index = None
159
+ # metadata = {}
160
+ except Exception as e:
161
+ print(f"Error loading FAISS data: {e}. Starting fresh.")
162
+ index = None
163
+ metadata = {}
164
+
165
+ if index is None:
166
+ print("Creating new FAISS index.")
167
+ # Using IndexFlatL2, simple L2 distance. IndexIVFFlat is faster for large datasets but needs training.
168
+ index = faiss.IndexFlatL2(VECTOR_DIMENSION)
169
+ metadata = {}
170
+
171
+ return index, metadata
172
+
173
+ def save_faiss_index(index: faiss.Index, metadata: Dict[int, Dict]):
174
+ """Saves FAISS index and metadata."""
175
+ try:
176
+ print(f"Saving FAISS index to {FAISS_INDEX_FILE} ({index.ntotal} vectors)")
177
+ faiss.write_index(index, FAISS_INDEX_FILE)
178
+ print(f"Saving FAISS metadata to {FAISS_METADATA_FILE} ({len(metadata)} entries)")
179
+ with open(FAISS_METADATA_FILE, 'w') as f:
180
+ # Store metadata with string keys for JSON compatibility
181
+ json.dump({str(k): v for k, v in metadata.items()}, f, indent=4)
182
+ print("FAISS data saved successfully.")
183
+ except Exception as e:
184
+ print(f"Error saving FAISS data: {e}")
185
+ gr.Warning(f"Failed to save resource index: {e}")
186
+
187
+
188
+ def add_to_faiss(index: faiss.Index, metadata: Dict[int, Dict], content_list: List[Dict]) -> Tuple[faiss.Index, Dict[int, Dict], int]:
189
+ """Adds crawled content to FAISS index and metadata."""
190
+ texts_to_embed = [item.get('markdown', '') for item in content_list if item.get('markdown')]
191
+ urls = [item.get('url', 'Unknown URL') for item in content_list] # Track URLs
192
+
193
+ if not texts_to_embed:
194
+ print("No text content provided to add_to_faiss.")
195
+ return index, metadata, 0
196
+
197
+ print(f"Generating embeddings for {len(texts_to_embed)} chunks...")
198
+ embeddings, success = get_ollama_embeddings(texts_to_embed)
199
+ if not success:
200
+ gr.Warning("Some embeddings failed to generate. Results might be incomplete.")
201
+
202
+ valid_embeddings = np.array([emb for emb in embeddings if emb], dtype='float32')
203
+
204
+ if valid_embeddings.shape[0] == 0:
205
+ print("No valid embeddings generated.")
206
+ return index, metadata, 0
207
+
208
+ # Add vectors to FAISS index
209
+ start_index = index.ntotal
210
+ index.add(valid_embeddings)
211
+ print(f"Added {valid_embeddings.shape[0]} vectors to FAISS index. New total: {index.ntotal}")
212
+
213
+ # Add corresponding metadata
214
+ added_count = 0
215
+ original_indices_added = [i for i, emb in enumerate(embeddings) if emb] # Indices from original list that had valid embeddings
216
+
217
+ for i, original_idx in enumerate(original_indices_added):
218
+ faiss_id = start_index + i
219
+ metadata[faiss_id] = {
220
+ "text": texts_to_embed[original_idx],
221
+ "url": urls[original_idx],
222
+ # Add other relevant info like title if available from crawler
223
+ }
224
+ added_count += 1
225
+
226
+ print(f"Added metadata for {added_count} entries.")
227
+ return index, metadata, added_count
228
+
229
+ def search_faiss(index: faiss.Index, metadata: Dict[int, Dict], query_text: str, k: int = 5) -> List[Dict]:
230
+ """Searches FAISS index and returns relevant metadata entries."""
231
+ if not query_text or index.ntotal == 0:
232
+ return []
233
+
234
+ print(f"Generating embedding for query: {query_text[:50]}...")
235
+ query_embedding, success = get_ollama_embeddings([query_text])
236
+
237
+ if not success or not query_embedding[0]:
238
+ gr.Error("Failed to generate embedding for search query.")
239
+ return []
240
+
241
+ query_vector = np.array(query_embedding, dtype='float32')
242
+
243
+ print(f"Searching FAISS index (k={k})...")
244
+ try:
245
+ # D: distances, I: indices (IDs)
246
+ distances, indices = index.search(query_vector, k)
247
+ results = []
248
+ if indices.size > 0:
249
+ for i, faiss_id in enumerate(indices[0]): # indices is 2D array [[id1, id2, ...]]
250
+ if faiss_id != -1: # -1 indicates no neighbor found
251
+ entry = metadata.get(faiss_id)
252
+ if entry:
253
+ entry_with_score = entry.copy()
254
+ # L2 distance, lower is better. Can convert to similarity score if needed.
255
+ entry_with_score['score'] = float(distances[0][i])
256
+ results.append(entry_with_score)
257
+ else:
258
+ print(f"Warning: FAISS ID {faiss_id} not found in metadata.")
259
+ print(f"Found {len(results)} results from FAISS.")
260
+ return results
261
+ except Exception as e:
262
+ print(f"Error during FAISS search: {e}")
263
+ gr.Error(f"FAISS search failed: {e}")
264
+ return []
265
+
266
+ # --- LLM Interaction Functions ---
267
+ def call_ollama_chat(system_prompt: str, user_prompt: str) -> Dict:
268
+ messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
269
+ try:
270
+ response = ollama.chat(model=OLLAMA_MODEL, messages=messages)
271
+ response_content = response['message']['content']
272
+ try:
273
+ # Clean potential markdown code block fences
274
+ if response_content.startswith("```json"): response_content = response_content[7:]
275
+ if response_content.endswith("```"): response_content = response_content[:-3]
276
+ parsed_json = json.loads(response_content.strip())
277
+ return parsed_json
278
+ except json.JSONDecodeError:
279
+ print(f"LLM response for '{system_prompt[:30]}...' was not valid JSON.")
280
+ return {"raw_response": response_content}
281
+ except Exception as e:
282
+ print(f"Error calling Ollama: {e}")
283
+ return {"error": str(e)}
284
+
285
+ # Specific LLM tasks
286
+ def analyze_emotion(journal_entry: str) -> Dict:
287
+ return call_ollama_chat(EMOTION_ANALYSIS_PROMPT, journal_entry)
288
+
289
+ def generate_growth_plan(emotion_analysis: Dict, user_goals: Dict) -> Dict:
290
+ input_data = {"emotion_analysis": emotion_analysis, "user_goals": user_goals}
291
+ return call_ollama_chat(GROWTH_PLAN_PROMPT, json.dumps(input_data, indent=2))
292
+
293
+ def synthesize_resources_llm(emotion_analysis: Dict, growth_plan: Optional[Dict], search_results: List[Dict]) -> Dict:
294
+ """Synthesizes resources using LLM based on FAISS search results."""
295
+ if not search_results: return {"error": "No search results provided for synthesis."}
296
+ # Extract text and URLs from search results
297
+ snippets = [f"--- Content from {res.get('url', 'Unknown')} ---\n{res.get('text', '')}" for res in search_results]
298
+ source_urls = list(set([res.get('url', 'Unknown') for res in search_results]))
299
+ combined_content = "\n\n".join(snippets)
300
+
301
+ input_data = {
302
+ "emotion_analysis": emotion_analysis,
303
+ "growth_plan": growth_plan if growth_plan else "No specific growth plan available.",
304
+ "web_content_snippets": combined_content,
305
+ "source_urls_provided": source_urls
306
+ }
307
+ synthesis_result = call_ollama_chat(RESOURCE_SYNTHESIS_PROMPT, json.dumps(input_data, indent=2))
308
+ # Add source URLs if LLM didn't
309
+ if isinstance(synthesis_result, dict) and 'source_urls' not in synthesis_result:
310
+ synthesis_result['source_urls'] = source_urls
311
+ return synthesis_result
312
+
313
+ def get_community_suggestions(emotion_analysis: Dict, growth_plan: Optional[Dict]) -> Dict:
314
+ input_data = {"emotion_analysis": emotion_analysis, "growth_plan": growth_plan}
315
+ return call_ollama_chat(COMMUNITY_SUGGESTION_PROMPT, json.dumps(input_data, indent=2))
316
+
317
+
318
+ # --- Web Search and Crawl Functions ---
319
+ def get_web_urls(search_term: str, num_results: int = 3) -> List[str]:
320
+ # ... (Keep implementation from previous simplified version) ...
321
+ allowed_urls = []
322
+ try:
323
+ enhanced_search = f"{search_term} emotional regulation coping strategies therapy techniques"
324
+ print(f"Searching DDG for: {enhanced_search}")
325
+ results = DDGS().text(enhanced_search, max_results=num_results * 2) # Fetch slightly more
326
+ urls = [result["href"] for result in results if result.get("href")]
327
+ # Basic filtering
328
+ filtered_urls = []
329
+ seen_domains = set()
330
+ discard_domains = {"youtube.com", "amazon.com", "pinterest.com", "facebook.com", "instagram.com", "twitter.com", "tiktok.com"}
331
+ for url in urls:
332
+ if url.lower().endswith(".pdf"): continue
333
+ try:
334
+ domain = urlparse(url).netloc.replace("www.", "")
335
+ if domain and domain not in seen_domains and domain not in discard_domains:
336
+ filtered_urls.append(url)
337
+ seen_domains.add(domain)
338
+ except Exception: continue
339
+ allowed_urls = check_robots_txt(filtered_urls[:num_results]) # Limit to desired number
340
+ print(f"Allowed URLs: {allowed_urls}")
341
+ except Exception as e: print(f"❌ Failed search: {str(e)}")
342
+ return allowed_urls
343
+
344
+ def check_robots_txt(urls: List[str]) -> List[str]: # Simplified
345
+ return urls
346
+
347
+ async def crawl_webpages_simple(urls: List[str]) -> List[Dict]:
348
+ """Crawls pages, returns [{'url': url, 'markdown': markdown}]."""
349
+ if not CRAWL4AI_AVAILABLE or not urls: return []
350
+ md_generator = DefaultMarkdownGenerator()
351
+ crawler_config = CrawlerRunConfig(markdown_generator=md_generator, excluded_tags=["script", "style", "nav", "footer", "aside"], only_text=False, cache_mode=CacheMode.NORMAL, user_agent="Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", page_timeout=20000, wait_for_network_idle=True, network_idle_timeout=3000)
352
+ browser_config = BrowserConfig(headless=True, text_mode=False, light_mode=True)
353
+ results_list = []
354
+ print(f"Crawling {len(urls)} URLs...")
355
+ try:
356
+ async with AsyncWebCrawler(config=browser_config) as crawler:
357
+ crawl_results = await crawler.arun_many(urls, config=crawler_config)
358
+ for res in crawl_results:
359
+ markdown_content = res.markdown_v2.raw_markdown if (res and res.markdown_v2 and res.markdown_v2.raw_markdown) else ""
360
+ if markdown_content.strip(): results_list.append({'url': res.url, 'markdown': markdown_content.strip()})
361
+ except Exception as e: print(f"Crawling error: {e}")
362
+ print(f"Crawled {len(results_list)} pages successfully.")
363
+ return results_list
364
+
365
+
366
+ # --- Gradio App Logic ---
367
+
368
+ # Load initial FAISS data
369
+ faiss_index, faiss_metadata = create_or_load_faiss_index()
370
+ community_posts_global = load_community_posts() # Load community posts once
371
+
372
+ # Helper to format analysis for display
373
+ def format_analysis(analysis):
374
+ if not analysis or "error" in analysis or "raw_response" in analysis:
375
+ return f"Analysis Error or Incomplete:\n```json\n{json.dumps(analysis, indent=2)}\n```"
376
+ md = f"""
377
+ **Primary Emotion:** {analysis.get('primary_emotion', 'N/A')} (Intensity: {analysis.get('intensity', 'N/A')}/10)
378
+ **Triggers:** {', '.join(analysis.get('triggers', [])) or 'None'}
379
+ **Patterns:** {', '.join(analysis.get('patterns', [])) or 'None'}
380
+
381
+ **Growth Opportunities:**
382
+ """
383
+ for opp in analysis.get('growth_opportunities', []): md += f"- {opp}\n"
384
+ md += "\n**Action Steps:**\n"
385
+ for step in analysis.get('action_steps', []): md += f"- {step}\n"
386
+ return md
387
+
388
+ # Helper to format plan for display
389
+ def format_plan(plan):
390
+ if not plan or "error" in plan or "raw_response" in plan:
391
+ return f"Plan Error or Incomplete:\n```json\n{json.dumps(plan, indent=2)}\n```"
392
+ md = "**Short-term Actions:**\n"
393
+ for item in plan.get('short_term_actions', []): md += f"- {item}\n"
394
+ md += "\n**Medium-term Practices:**\n"
395
+ for item in plan.get('medium_term_practices', []): md += f"- {item}\n"
396
+ md += "\n**Long-term Changes:**\n"
397
+ for item in plan.get('long_term_changes', []): md += f"- {item}\n"
398
+ md += "\n**Reflection Prompts:**\n"
399
+ for item in plan.get('reflection_prompts', []): md += f"- {item}\n"
400
+ md += "\n**Success Metrics:**\n"
401
+ for item in plan.get('success_metrics', []): md += f"- {item}\n"
402
+ return md
403
+
404
+ # Helper to format synthesis for display
405
+ def format_synthesis(synthesis):
406
+ if not synthesis or "error" in synthesis or "raw_response" in synthesis:
407
+ return f"Synthesis Error or Incomplete:\n```json\n{json.dumps(synthesis, indent=2)}\n```"
408
+ md = "**Key Insights:**\n"
409
+ for item in synthesis.get('key_insights', []): md += f"- {item}\n"
410
+ md += "\n**Practical Exercises:**\n"
411
+ for item in synthesis.get('practical_exercises', []): md += f"- {item}\n"
412
+ md += "\n**Recommended Readings:**\n"
413
+ for item in synthesis.get('recommended_readings', []): md += f"- {item}\n"
414
+ md += f"\n**Expert Advice Summary:**\n{synthesis.get('expert_advice', 'N/A')}\n"
415
+ md += "\n**Action Plan:**\n"
416
+ for item in synthesis.get('action_plan', []): md += f"- {item}\n"
417
+ md += "\n**Sources:**\n"
418
+ for item in synthesis.get('source_urls', []): md += f"- {item}\n"
419
+ return md
420
+
421
+ # Helper to format community posts
422
+ def format_community_posts(posts):
423
+ if not posts: return "No community posts yet."
424
+ md = ""
425
+ for post in sorted(posts, key=lambda x: x['timestamp'], reverse=True):
426
+ comments_md = ""
427
+ for c in sorted(post.get('comments', []), key=lambda x: x['timestamp']):
428
+ comments_md += f" - **{c['user_id']}** ({c['timestamp'][:16]}): {c['comment']}\n"
429
+ md += f"""
430
+ ### {post['title']}
431
+ **By:** {post['user_id']} ({post['timestamp'][:16]}) | **Likes:** {post['likes']}
432
+ {post['content']}
433
+ **Comments ({len(post.get('comments',[]))}):**
434
+ {comments_md or ' (No comments)'}
435
+ ---
436
+ """
437
+ return md
438
+
439
+
440
+ # --- Gradio Interface ---
441
+ with gr.Blocks(theme=gr.themes.Soft(), title="EmotionToAction") as demo:
442
+ # --- State Management ---
443
+ # User-specific data loaded from file
444
+ user_data_state = gr.State({})
445
+ # FAISS index and metadata loaded once
446
+ faiss_index_state = gr.State(faiss_index)
447
+ faiss_metadata_state = gr.State(faiss_metadata)
448
+ # Session state for current analysis/plan context
449
+ current_analysis_state = gr.State(None)
450
+ current_plan_state = gr.State(None)
451
+ current_emotion_id_state = gr.State(None) # ID of the entry being viewed/processed
452
+ community_posts_state = gr.State(community_posts_global) # Use global list loaded once
453
+
454
+ gr.Markdown("# 🌱 EmotionToAction (Gradio Version)")
455
+
456
+ with gr.Row():
457
+ username_input = gr.Textbox(label="Enter Username", placeholder="Type username and press Enter")
458
+ # Output for status messages
459
+ status_output = gr.Markdown("")
460
+
461
+ # --- Main Tabs ---
462
+ with gr.Tabs() as tabs:
463
+ # --- Journal Tab ---
464
+ with gr.TabItem("📝 Journal", id=0):
465
+ with gr.Row():
466
+ with gr.Column(scale=2):
467
+ journal_entry_input = gr.Textbox(label="What are you feeling right now?", lines=10, placeholder="Describe your emotional experience...")
468
+ analyze_button = gr.Button("Analyze Emotions", variant="primary")
469
+ with gr.Column(scale=1):
470
+ gr.Markdown("### Past Entries")
471
+ past_entries_display = gr.DataFrame(headers=["Date", "Emotion", "Entry Snippet", "Entry ID"], interactive=False, height=300)
472
+ # Load past entries when user changes or button clicked?
473
+
474
+ # --- Analysis Tab ---
475
+ with gr.TabItem("🧠 Analysis", id=1):
476
+ gr.Markdown("### AI Emotion Analysis")
477
+ analysis_display = gr.Markdown("Analysis will appear here after submitting a journal entry.")
478
+ with gr.Row():
479
+ plan_button = gr.Button("💡 Create Growth Plan")
480
+ find_resources_button = gr.Button("🔎 Find & Add Resources") # Changed label
481
+
482
+ # --- Plan Tab ---
483
+ with gr.TabItem("🚀 Growth Plan", id=2):
484
+ gr.Markdown("### Your Personalized Growth Plan")
485
+ plan_display = gr.Markdown("Plan will appear here after generation.")
486
+
487
+ # --- Resources Tab ---
488
+ with gr.TabItem("📚 Resources", id=3):
489
+ with gr.Accordion("Find New Resources (Adds to Index)", open=False):
490
+ find_resources_status = gr.Markdown("Trigger resource finding from the 'Analysis' tab.")
491
+ with gr.Accordion("Synthesize Found Resources", open=True):
492
+ synthesis_query_input = gr.Textbox(label="Describe the topic you want synthesized resources for (e.g., 'managing anxiety triggered by work')", placeholder="Uses info stored in resource index...")
493
+ synthesize_button = gr.Button("Synthesize Resources", variant="secondary")
494
+ synthesis_display = gr.Markdown("Synthesized insights will appear here.")
495
+ with gr.Accordion("Search Indexed Resources", open=True):
496
+ search_query_input = gr.Textbox(label="Search indexed content", placeholder="Enter keywords...")
497
+ search_button = gr.Button("Search Index", variant="secondary")
498
+ search_results_display = gr.DataFrame(headers=["Text Snippet", "Source URL", "Score"], interactive=False, height=300)
499
+
500
+
501
+ # --- Community Tab ---
502
+ with gr.TabItem("👥 Community", id=4):
503
+ gr.Markdown("### Community Hub")
504
+ with gr.Row():
505
+ with gr.Column(scale=2):
506
+ gr.Markdown("#### Recent Posts")
507
+ community_feed_display = gr.Markdown("Loading posts...") # Use Markdown for better formatting
508
+ with gr.Column(scale=1):
509
+ gr.Markdown("#### New Post")
510
+ post_title_input = gr.Textbox(label="Title")
511
+ post_content_input = gr.Textbox(label="Content", lines=5)
512
+ post_button = gr.Button("Submit Post", variant="primary")
513
+ # Add like/comment inputs here if desired (more complex)
514
+
515
+ # --- Profile Tab ---
516
+ with gr.TabItem("👤 Profile", id=5):
517
+ gr.Markdown("### Your Profile")
518
+ profile_points_display = gr.Number(label="Growth Points", interactive=False)
519
+ profile_joined_display = gr.Textbox(label="Member Since", interactive=False)
520
+ gr.Markdown("#### Growth Goals")
521
+ profile_goal1_input = gr.Textbox(label="Goal 1")
522
+ profile_goal2_input = gr.Textbox(label="Goal 2")
523
+ save_goals_button = gr.Button("Save Goals")
524
+
525
+ # --- Event Handlers ---
526
+
527
+ # Load user data when username is entered
528
+ def handle_username_change(username, user_data_s):
529
+ if not username:
530
+ return {"entries": [], "plans": {}, "resources": {}, "profile": {}}, "Please enter a username.", None, None, None, None, None, None, None
531
+ print(f"Loading data for user: {username}")
532
+ user_data = load_user_data(username)
533
+ # Ensure profile exists
534
+ if "profile" not in user_data:
535
+ user_data["profile"] = {"username": username, "joined": datetime.datetime.now().isoformat(), "points": 0, "goals":{}}
536
+ # Format past entries for DataFrame display
537
+ entry_list = user_data.get("entries", [])
538
+ df_data = [
539
+ [e['timestamp'][:10], e['analysis'].get('primary_emotion', 'N/A'), e['journal_entry'][:50]+'...', e['id']]
540
+ for e in sorted(entry_list, key=lambda x:x['timestamp'], reverse=True)
541
+ ]
542
+ past_entries_df = pd.DataFrame(df_data, columns=["Date", "Emotion", "Entry Snippet", "Entry ID"])
543
+
544
+ return user_data, f"Loaded data for {username}.", past_entries_df, \
545
+ user_data.get("profile", {}).get("points", 0), \
546
+ user_data.get("profile", {}).get("joined", ""), \
547
+ user_data.get("profile", {}).get("goals", {}).get("goal1", ""), \
548
+ user_data.get("profile", {}).get("goals", {}).get("goal2", "")
549
+
550
+ username_input.submit(
551
+ handle_username_change,
552
+ inputs=[username_input, user_data_state],
553
+ outputs=[user_data_state, status_output, past_entries_display,
554
+ profile_points_display, profile_joined_display,
555
+ profile_goal1_input, profile_goal2_input]
556
+ )
557
+
558
+ # Analyze Button Click
559
+ def handle_analyze(username, user_data_s, journal_entry):
560
+ if not username: return "Please enter username first.", None, None, None, None, None
561
+ if not journal_entry: return "Journal entry cannot be empty.", None, None, None, None, None
562
+
563
+ status = "Analyzing emotions..."
564
+ yield status, None, None, None, None, None # Update status immediately
565
+
566
+ analysis = analyze_emotion(journal_entry)
567
+
568
+ if "error" in analysis:
569
+ status = f"Analysis failed: {analysis['error']}"
570
+ formatted_analysis = f"Error:\n```json\n{json.dumps(analysis, indent=2)}\n```"
571
+ yield status, formatted_analysis, None, None, None, None
572
+ elif "raw_response" in analysis:
573
+ status = "Analysis complete (raw response)."
574
+ formatted_analysis = f"Raw Response:\n```\n{analysis['raw_response']}\n```"
575
+ # Cannot proceed with raw response usually
576
+ yield status, formatted_analysis, None, None, None, None
577
+ else:
578
+ # Save entry and update user data
579
+ entry_id = str(uuid.uuid4())
580
+ new_entry = {
581
+ 'id': entry_id,
582
+ 'timestamp': datetime.datetime.now().isoformat(),
583
+ 'journal_entry': journal_entry,
584
+ 'analysis': analysis
585
+ }
586
+ user_data_s["entries"] = user_data_s.get("entries", []) + [new_entry]
587
+ user_data_s["profile"]["points"] = user_data_s.get("profile", {}).get("points", 0) + 10
588
+ save_user_data(username, user_data_s)
589
+
590
+ # Update UI
591
+ status = "Analysis complete!"
592
+ formatted_analysis = format_analysis(analysis)
593
+ # Update past entries display immediately
594
+ entry_list = user_data_s.get("entries", [])
595
+ df_data = [[e['timestamp'][:10], e['analysis'].get('primary_emotion', 'N/A'), e['journal_entry'][:50]+'...', e['id']] for e in sorted(entry_list, key=lambda x:x['timestamp'], reverse=True)]
596
+ past_entries_df = pd.DataFrame(df_data, columns=["Date", "Emotion", "Entry Snippet", "Entry ID"])
597
+
598
+ # Return updates for status, analysis display, current analysis state, current emotion ID, user data state, and past entries df
599
+ yield status, formatted_analysis, analysis, entry_id, user_data_s, past_entries_df
600
+
601
+ analyze_button.click(
602
+ handle_analyze,
603
+ inputs=[username_input, user_data_state, journal_entry_input],
604
+ outputs=[status_output, analysis_display, current_analysis_state, current_emotion_id_state, user_data_state, past_entries_display]
605
+ )
606
+
607
+ # Create Plan Button Click
608
+ def handle_create_plan(username, user_data_s, current_analysis, current_emotion_id):
609
+ if not username: return "Please enter username.", None, None
610
+ if not current_analysis: return "Please analyze an entry first.", None, None
611
+ if not current_emotion_id: return "Internal error: Missing emotion ID.", None, None
612
+
613
+ status = "Generating growth plan..."
614
+ yield status, None, None # Update status immediately
615
+
616
+ user_goals = user_data_s.get("profile", {}).get("goals", {})
617
+ plan = generate_growth_plan(current_analysis, user_goals)
618
+
619
+ if "error" in plan or "raw_response" in plan:
620
+ status = "Failed to generate plan."
621
+ formatted_plan = f"Error/Raw:\n```json\n{json.dumps(plan, indent=2)}\n```"
622
+ yield status, formatted_plan, None
623
+ else:
624
+ # Save plan and update points
625
+ user_data_s["plans"] = user_data_s.get("plans", {})
626
+ user_data_s["plans"][current_emotion_id] = plan
627
+ user_data_s["profile"]["points"] = user_data_s.get("profile", {}).get("points", 0) + 20
628
+ save_user_data(username, user_data_s)
629
+ status = "Growth plan generated!"
630
+ formatted_plan = format_plan(plan)
631
+ yield status, formatted_plan, plan # Update status, display, and plan state
632
+
633
+ plan_button.click(
634
+ handle_create_plan,
635
+ inputs=[username_input, user_data_state, current_analysis_state, current_emotion_id_state],
636
+ outputs=[status_output, plan_display, current_plan_state]
637
+ )
638
+
639
+ # Find & Add Resources Button Click (Async)
640
+ async def handle_find_resources(username, current_analysis, faiss_index_s, faiss_metadata_s, progress=gr.Progress(track_tqdm=True)):
641
+ if not username: return "Please enter username.", faiss_index_s, faiss_metadata_s, "Idle"
642
+ if not current_analysis: return "Please analyze an entry first.", faiss_index_s, faiss_metadata_s, "Idle"
643
+ if not CRAWL4AI_AVAILABLE: return "crawl4ai library not installed.", faiss_index_s, faiss_metadata_s, "Error"
644
+
645
+ status_msg = "Starting resource finding..."
646
+ yield status_msg, faiss_index_s, faiss_metadata_s, status_msg # Initial update
647
+
648
+ emotion = current_analysis.get('primary_emotion', 'challenge')
649
+ triggers = current_analysis.get('triggers', [])
650
+ search_term = f"{emotion} coping strategies {' '.join(triggers)}"
651
+
652
+ progress(0.1, desc="Searching web...")
653
+ status_msg = "Searching web..."
654
+ yield status_msg, faiss_index_s, faiss_metadata_s, status_msg
655
+ urls = get_web_urls(search_term, num_results=3) # Limit URLs
656
+ if not urls: yield "No relevant URLs found.", faiss_index_s, faiss_metadata_s, "No URLs found."; return
657
+
658
+ progress(0.3, desc=f"Crawling {len(urls)} pages...")
659
+ status_msg = f"Crawling {len(urls)} pages..."
660
+ yield status_msg, faiss_index_s, faiss_metadata_s, status_msg
661
+ crawled_content = await crawl_webpages_simple(urls) # Async call
662
+ if not crawled_content: yield "Crawling failed or yielded no content.", faiss_index_s, faiss_metadata_s, "Crawling failed."; return
663
+
664
+ progress(0.7, desc="Adding content to FAISS index...")
665
+ status_msg = "Adding content to index..."
666
+ yield status_msg, faiss_index_s, faiss_metadata_s, status_msg
667
+ # Note: add_to_faiss modifies the index/metadata objects in place
668
+ index_obj = faiss_index_s # Get current index from state
669
+ meta_obj = faiss_metadata_s # Get current metadata from state
670
+ _, _, added_count = add_to_faiss(index_obj, meta_obj, crawled_content)
671
+
672
+ if added_count > 0:
673
+ # IMPORTANT: Save the modified index and metadata back to disk
674
+ save_faiss_index(index_obj, meta_obj)
675
+ status_msg = f"Successfully added {added_count} content chunks to the index."
676
+ yield status_msg, index_obj, meta_obj, status_msg # Return updated state objects
677
+ else:
678
+ status_msg = "Crawled content, but failed to add anything to the index."
679
+ yield status_msg, index_obj, meta_obj, status_msg
680
+
681
+ # Use the wrapper for async function
682
+ find_resources_button.click(
683
+ handle_find_resources,
684
+ inputs=[username_input, current_analysis_state, faiss_index_state, faiss_metadata_state],
685
+ outputs=[status_output, faiss_index_state, faiss_metadata_state, find_resources_status] # Update index/meta state
686
+ )
687
+
688
+ # Synthesize Button Click
689
+ def handle_synthesize(username, user_data_s, current_emotion_id, faiss_index_s, faiss_metadata_s, query_override=""):
690
+ if not username: return "Please enter username.", None
691
+ # Prioritize using an emotion context if available, else use the override query
692
+ search_text = ""
693
+ context_analysis = None
694
+ context_plan = None
695
+ if not query_override and current_emotion_id:
696
+ entry = next((e for e in user_data_s.get("entries", []) if e['id'] == current_emotion_id), None)
697
+ if entry and 'analysis' in entry:
698
+ context_analysis = entry['analysis']
699
+ context_plan = user_data_s.get("plans", {}).get(current_emotion_id)
700
+ emotion = context_analysis.get('primary_emotion', 'issue')
701
+ triggers = context_analysis.get('triggers', [])
702
+ search_text = f"{emotion} coping techniques {' '.join(triggers)}"
703
+ else: query_override = "general emotional coping strategies" # Fallback if context missing
704
+ elif not query_override:
705
+ query_override = "general emotional coping strategies" # Default if no context
706
+
707
+ if query_override: search_text = query_override
708
+ if not search_text: return "Cannot determine search topic.", None
709
+
710
+ status = f"Searching index for '{search_text[:30]}...' and synthesizing..."
711
+ yield status, "Synthesizing..." # Update status
712
+
713
+ search_results = search_faiss(faiss_index_s, faiss_metadata_s, search_text, k=5)
714
+ if not search_results:
715
+ yield f"No relevant info found in index for '{search_text[:30]}...'", "No results found."
716
+ return
717
+
718
+ synthesis = synthesize_resources_llm(context_analysis or {}, context_plan, search_results)
719
+
720
+ if "error" in synthesis or "raw_response" in synthesis:
721
+ formatted_synthesis = f"Synthesis Error/Raw:\n```json\n{json.dumps(synthesis, indent=2)}\n```"
722
+ yield "Synthesis failed.", formatted_synthesis
723
+ else:
724
+ # Save synthesis result associated with the emotion ID if context was used
725
+ if context_analysis and current_emotion_id:
726
+ user_data_s["resources"] = user_data_s.get("resources", {})
727
+ user_data_s["resources"][current_emotion_id] = synthesis
728
+ save_user_data(username, user_data_s)
729
+ status = "Synthesis complete and saved!"
730
+ else:
731
+ status = "Synthesis complete (not saved to specific entry)."
732
+ formatted_synthesis = format_synthesis(synthesis)
733
+ yield status, formatted_synthesis
734
+
735
+ synthesize_button.click(
736
+ handle_synthesize,
737
+ inputs=[username_input, user_data_state, current_emotion_id_state, faiss_index_state, faiss_metadata_state, synthesis_query_input],
738
+ outputs=[status_output, synthesis_display]
739
+ )
740
+
741
+
742
+ # Search Index Button Click
743
+ def handle_search_index(faiss_index_s, faiss_metadata_s, query):
744
+ if not query: return "Please enter search query.", None
745
+ results = search_faiss(faiss_index_s, faiss_metadata_s, query, k=10)
746
+ if not results: return "No results found.", None
747
+ # Format for DataFrame
748
+ df_data = [[res.get('text', '')[:150]+'...', res.get('url', 'N/A'), f"{res.get('score', 0):.2f}"] for res in results]
749
+ results_df = pd.DataFrame(df_data, columns=["Text Snippet", "Source URL", "Score"])
750
+ return f"Found {len(results)} results.", results_df
751
+
752
+ search_button.click(
753
+ handle_search_index,
754
+ inputs=[faiss_index_state, faiss_metadata_state, search_query_input],
755
+ outputs=[status_output, search_results_display]
756
+ )
757
+
758
+ # --- Community Handlers ---
759
+ def handle_new_post(username, title, content, community_posts_s):
760
+ if not username: return "Enter username first.", community_posts_s, format_community_posts(community_posts_s)
761
+ if not title or not content: return "Title and content required.", community_posts_s, format_community_posts(community_posts_s)
762
+
763
+ new_post = {'id': str(uuid.uuid4()), 'user_id': username, 'timestamp': datetime.datetime.now().isoformat(), 'title': title, 'content': content, 'likes': 0, 'comments': []}
764
+ community_posts_s.append(new_post)
765
+ save_community_posts(community_posts_s) # Save updated list
766
+ return "Post submitted.", community_posts_s, format_community_posts(community_posts_s)
767
+
768
+ post_button.click(
769
+ handle_new_post,
770
+ inputs=[username_input, post_title_input, post_content_input, community_posts_state],
771
+ outputs=[status_output, community_posts_state, community_feed_display] # Update state and display
772
+ )
773
+
774
+ # Initial load of community posts display
775
+ demo.load(lambda posts: format_community_posts(posts), inputs=community_posts_state, outputs=community_feed_display)
776
+
777
+
778
+ # --- Profile Handlers ---
779
+ def handle_save_goals(username, user_data_s, goal1, goal2):
780
+ if not username: return "Enter username first.", user_data_s
781
+ user_data_s["profile"] = user_data_s.get("profile", {})
782
+ user_data_s["profile"]["goals"] = {"goal1": goal1, "goal2": goal2}
783
+ save_user_data(username, user_data_s)
784
+ return "Goals saved!", user_data_s
785
+
786
+ save_goals_button.click(
787
+ handle_save_goals,
788
+ inputs=[username_input, user_data_state, profile_goal1_input, profile_goal2_input],
789
+ outputs=[status_output, user_data_state] # Update user data state
790
+ )
791
+
792
+
793
+ # Launch the Gradio app
794
+ if __name__ == "__main__":
795
+ demo.launch(debug=True) # Share=True to create public link if needed