awacke1 commited on
Commit
e3a7a4b
·
verified ·
1 Parent(s): 8a22f0c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1269 -0
app.py ADDED
@@ -0,0 +1,1269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic
3
+ import openai
4
+ import base64
5
+ import cv2
6
+ import glob
7
+ import json
8
+ import math
9
+ import os
10
+ import pytz
11
+ import random
12
+ import re
13
+ import requests
14
+ #import textract
15
+ import time
16
+ import zipfile
17
+ import plotly.graph_objects as go
18
+ import streamlit.components.v1 as components
19
+ from datetime import datetime
20
+ from audio_recorder_streamlit import audio_recorder
21
+ from bs4 import BeautifulSoup
22
+ from collections import defaultdict, deque, Counter
23
+ from dotenv import load_dotenv
24
+ from gradio_client import Client
25
+ from huggingface_hub import InferenceClient
26
+ from io import BytesIO
27
+ from PIL import Image
28
+ from PyPDF2 import PdfReader
29
+ from urllib.parse import quote
30
+ from xml.etree import ElementTree as ET
31
+ from openai import OpenAI
32
+ import extra_streamlit_components as stx
33
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
34
+ import asyncio
35
+ import edge_tts
36
+ from streamlit_marquee import streamlit_marquee
37
+ from typing import Tuple, Optional
38
+ import pandas as pd
39
+
40
+ # Patch the asyncio event loop to allow nested use of asyncio.run()
41
+ import nest_asyncio
42
+ nest_asyncio.apply()
43
+
44
+ # ─────────────────────────────────────────────────────────
45
+ # 1. CORE CONFIGURATION & SETUP
46
+ # ─────────────────────────────────────────────────────────
47
+
48
+ st.set_page_config(
49
+ page_title="🚲TalkingAIResearcher🏆",
50
+ page_icon="🚲🏆",
51
+ layout="wide",
52
+ initial_sidebar_state="auto",
53
+ menu_items={
54
+ 'Get Help': 'https://huggingface.co/awacke1',
55
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
56
+ 'About': "🚲TalkingAIResearcher🏆"
57
+ }
58
+ )
59
+ load_dotenv()
60
+
61
+ # ▶ Available English voices for Edge TTS
62
+ EDGE_TTS_VOICES = [
63
+ "en-US-AriaNeural",
64
+ "en-US-GuyNeural",
65
+ "en-US-JennyNeural",
66
+ "en-GB-SoniaNeural",
67
+ "en-GB-RyanNeural",
68
+ "en-AU-NatashaNeural",
69
+ "en-AU-WilliamNeural",
70
+ "en-CA-ClaraNeural",
71
+ "en-CA-LiamNeural"
72
+ ]
73
+
74
+ # ▶ Initialize Session State
75
+ if 'marquee_settings' not in st.session_state:
76
+ st.session_state['marquee_settings'] = {
77
+ "background": "#1E1E1E",
78
+ "color": "#FFFFFF",
79
+ "font-size": "14px",
80
+ "animationDuration": "20s",
81
+ "width": "100%",
82
+ "lineHeight": "35px"
83
+ }
84
+ if 'tts_voice' not in st.session_state:
85
+ st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
86
+ if 'audio_format' not in st.session_state:
87
+ st.session_state['audio_format'] = 'mp3'
88
+ if 'transcript_history' not in st.session_state:
89
+ st.session_state['transcript_history'] = []
90
+ if 'chat_history' not in st.session_state:
91
+ st.session_state['chat_history'] = []
92
+ if 'openai_model' not in st.session_state:
93
+ st.session_state['openai_model'] = "gpt-4o-2024-05-13"
94
+ if 'messages' not in st.session_state:
95
+ st.session_state['messages'] = []
96
+ if 'last_voice_input' not in st.session_state:
97
+ st.session_state['last_voice_input'] = ""
98
+ if 'editing_file' not in st.session_state:
99
+ st.session_state['editing_file'] = None
100
+ if 'edit_new_name' not in st.session_state:
101
+ st.session_state['edit_new_name'] = ""
102
+ if 'edit_new_content' not in st.session_state:
103
+ st.session_state['edit_new_content'] = ""
104
+ if 'viewing_prefix' not in st.session_state:
105
+ st.session_state['viewing_prefix'] = None
106
+ if 'should_rerun' not in st.session_state:
107
+ st.session_state['should_rerun'] = False
108
+ if 'old_val' not in st.session_state:
109
+ st.session_state['old_val'] = None
110
+ if 'last_query' not in st.session_state:
111
+ st.session_state['last_query'] = ""
112
+ if 'marquee_content' not in st.session_state:
113
+ st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
114
+
115
+ # ▶ Additional keys for performance, caching, etc.
116
+ if 'audio_cache' not in st.session_state:
117
+ st.session_state['audio_cache'] = {}
118
+ if 'download_link_cache' not in st.session_state:
119
+ st.session_state['download_link_cache'] = {}
120
+ if 'operation_timings' not in st.session_state:
121
+ st.session_state['operation_timings'] = {}
122
+ if 'performance_metrics' not in st.session_state:
123
+ st.session_state['performance_metrics'] = defaultdict(list)
124
+ if 'enable_audio' not in st.session_state:
125
+ st.session_state['enable_audio'] = True # Turn TTS on/off
126
+
127
+ # ▶ API Keys
128
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
129
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
130
+ xai_key = os.getenv('xai',"")
131
+ if 'OPENAI_API_KEY' in st.secrets:
132
+ openai_api_key = st.secrets['OPENAI_API_KEY']
133
+ if 'ANTHROPIC_API_KEY' in st.secrets:
134
+ anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
135
+
136
+ openai.api_key = openai_api_key
137
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
138
+ HF_KEY = os.getenv('HF_KEY')
139
+ API_URL = os.getenv('API_URL')
140
+
141
+ # ▶ Helper constants
142
+ FILE_EMOJIS = {
143
+ "md": "📝",
144
+ "mp3": "🎵",
145
+ "wav": "🔊"
146
+ }
147
+
148
+ # ─────────────────────────────────────────────────────────
149
+ # 2. PERFORMANCE MONITORING & TIMING
150
+ # ─────────────────────────────────────────────────────────
151
+
152
+ class PerformanceTimer:
153
+ """
154
+ ⏱️ A context manager for timing operations with automatic logging.
155
+ Usage:
156
+ with PerformanceTimer("my_operation"):
157
+ # do something
158
+ The duration is stored into `st.session_state['operation_timings']`
159
+ and appended to the `performance_metrics` list.
160
+ """
161
+ def __init__(self, operation_name: str):
162
+ self.operation_name = operation_name
163
+ self.start_time = None
164
+
165
+ def __enter__(self):
166
+ self.start_time = time.time()
167
+ return self
168
+
169
+ def __exit__(self, exc_type, exc_val, exc_tb):
170
+ if not exc_type: # Only log if no exception occurred
171
+ duration = time.time() - self.start_time
172
+ st.session_state['operation_timings'][self.operation_name] = duration
173
+ st.session_state['performance_metrics'][self.operation_name].append(duration)
174
+
175
+ def log_performance_metrics():
176
+ """
177
+ 📈 Display performance metrics in the sidebar, including a timing breakdown
178
+ and a small bar chart of average times.
179
+ """
180
+ st.sidebar.markdown("### ⏱️ Performance Metrics")
181
+
182
+ metrics = st.session_state['operation_timings']
183
+ if metrics:
184
+ total_time = sum(metrics.values())
185
+ st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
186
+
187
+ # Break down each operation time
188
+ for operation, duration in metrics.items():
189
+ percentage = (duration / total_time) * 100
190
+ st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
191
+
192
+ # Show timing history chart
193
+ history_data = []
194
+ for op, times in st.session_state['performance_metrics'].items():
195
+ if times: # Only if we have data
196
+ avg_time = sum(times) / len(times)
197
+ history_data.append({"Operation": op, "Avg Time (s)": avg_time})
198
+
199
+ if history_data:
200
+ st.sidebar.markdown("### 📊 Timing History (Avg)")
201
+ chart_data = pd.DataFrame(history_data)
202
+ st.sidebar.bar_chart(chart_data.set_index("Operation"))
203
+
204
+ # ─────────────────────────────────────────────────────────
205
+ # 3. HELPER FUNCTIONS (FILENAMES, LINKS, MARQUEE, ETC.)
206
+ # ─────────────────────────────────────────────────────────
207
+
208
+ def get_central_time():
209
+ """🌎 Get current time in US Central timezone."""
210
+ central = pytz.timezone('US/Central')
211
+ return datetime.now(central)
212
+
213
+ def format_timestamp_prefix():
214
+ """📅 Generate a timestamp prefix"""
215
+ ct = get_central_time()
216
+ return ct.strftime("%Y%m%d_%H%M%S")
217
+
218
+ def initialize_marquee_settings():
219
+ """🌈 Initialize marquee defaults if needed."""
220
+ if 'marquee_settings' not in st.session_state:
221
+ st.session_state['marquee_settings'] = {
222
+ "background": "#1E1E1E",
223
+ "color": "#FFFFFF",
224
+ "font-size": "14px",
225
+ "animationDuration": "20s",
226
+ "width": "100%",
227
+ "lineHeight": "35px"
228
+ }
229
+
230
+ def get_marquee_settings():
231
+ """🔧 Retrieve marquee settings from session."""
232
+ initialize_marquee_settings()
233
+ return st.session_state['marquee_settings']
234
+
235
+ def update_marquee_settings_ui():
236
+ """🖌 Add color pickers & sliders for marquee config in the sidebar."""
237
+ st.sidebar.markdown("### 🎯 Marquee Settings")
238
+ cols = st.sidebar.columns(2)
239
+ with cols[0]:
240
+ bg_color = st.color_picker("🎨 Background",
241
+ st.session_state['marquee_settings']["background"],
242
+ key="bg_color_picker")
243
+ text_color = st.color_picker("✍️ Text",
244
+ st.session_state['marquee_settings']["color"],
245
+ key="text_color_picker")
246
+ with cols[1]:
247
+ font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider")
248
+ duration = st.slider("⏱️ Speed (secs)", 1, 20, 20, key="duration_slider")
249
+
250
+ st.session_state['marquee_settings'].update({
251
+ "background": bg_color,
252
+ "color": text_color,
253
+ "font-size": f"{font_size}px",
254
+ "animationDuration": f"{duration}s"
255
+ })
256
+
257
+ def display_marquee(text, settings, key_suffix=""):
258
+ """
259
+ 🎉 Show a marquee text with style from the marquee settings.
260
+ Automatically truncates text to ~280 chars to avoid overflow.
261
+ """
262
+ truncated_text = text[:280] + "..." if len(text) > 280 else text
263
+ streamlit_marquee(
264
+ content=truncated_text,
265
+ **settings,
266
+ key=f"marquee_{key_suffix}"
267
+ )
268
+ st.write("")
269
+
270
+ def get_high_info_terms(text: str, top_n=10) -> list:
271
+ """
272
+ 📌 Extract top_n frequent words & bigrams (excluding common stopwords).
273
+ Useful for generating short descriptive keywords from Q/A content.
274
+ """
275
+ stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
276
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
277
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
278
+ combined = words + bi_grams
279
+ filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2]
280
+ counter = Counter(filtered)
281
+ return [term for term, freq in counter.most_common(top_n)]
282
+
283
+ def clean_text_for_filename(text: str) -> str:
284
+ """
285
+ 🏷️ Remove special chars & short unhelpful words from text for safer filenames.
286
+ Returns a lowercased, underscore-joined token string.
287
+ """
288
+ text = text.lower()
289
+ text = re.sub(r'[^\w\s-]', '', text)
290
+ words = text.split()
291
+ stop_short = set(['the', 'and', 'for', 'with', 'this', 'that', 'ai', 'library'])
292
+ filtered = [w for w in words if len(w) > 3 and w not in stop_short]
293
+ return '_'.join(filtered)[:200]
294
+
295
+ def generate_filename(prompt, response, file_type="md", max_length=200):
296
+ """
297
+ 📁 Create a shortened filename based on prompt+response content:
298
+ 1) Extract top info terms,
299
+ 2) Combine snippet from prompt+response,
300
+ 3) Remove duplicates,
301
+ 4) Append word counts and estimated duration tokens,
302
+ 5) Truncate if needed.
303
+ """
304
+ prefix = format_timestamp_prefix() + "_"
305
+ combined_text = (prompt + " " + response)[:200]
306
+ info_terms = get_high_info_terms(combined_text, top_n=5)
307
+ snippet = (prompt[:40] + " " + response[:40]).strip()
308
+ snippet_cleaned = clean_text_for_filename(snippet)
309
+
310
+ # Remove duplicates
311
+ name_parts = info_terms + [snippet_cleaned]
312
+ seen = set()
313
+ unique_parts = []
314
+ for part in name_parts:
315
+ if part not in seen:
316
+ seen.add(part)
317
+ unique_parts.append(part)
318
+
319
+ # NEW: Compute word counts for title (prompt) and summary (response) and estimated duration
320
+ wct = len(prompt.split())
321
+ sw = len(response.split())
322
+ # Estimated duration (seconds) assuming a reading speed of 2.5 words per second
323
+ estimated_duration = round((wct + sw) / 2.5)
324
+
325
+ base_name = '_'.join(unique_parts).strip('_')
326
+ # NEW: Append new tokens for word counts and duration
327
+ extra_tokens = f"_wct{wct}_sw{sw}_dur{estimated_duration}"
328
+ leftover_chars = max_length - len(prefix) - len(file_type) - 1
329
+ if len(base_name) + len(extra_tokens) > leftover_chars:
330
+ base_name = base_name[:leftover_chars - len(extra_tokens)]
331
+ full_name = base_name + extra_tokens
332
+
333
+ return f"{prefix}{full_name}.{file_type}"
334
+
335
+ def create_file(prompt, response, file_type="md"):
336
+ """
337
+ 📝 Create a text file from prompt + response with a sanitized filename.
338
+ Returns the created filename.
339
+ """
340
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
341
+ with open(filename, 'w', encoding='utf-8') as f:
342
+ f.write(prompt + "\n\n" + response)
343
+ return filename
344
+
345
+ def get_download_link(file, file_type="zip"):
346
+ """
347
+ Convert a file to base64 and return an HTML link for download.
348
+ """
349
+ with open(file, "rb") as f:
350
+ b64 = base64.b64encode(f.read()).decode()
351
+ if file_type == "zip":
352
+ return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">📂 Download {os.path.basename(file)}</a>'
353
+ elif file_type == "mp3":
354
+ return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">🎵 Download {os.path.basename(file)}</a>'
355
+ elif file_type == "wav":
356
+ return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">🔊 Download {os.path.basename(file)}</a>'
357
+ elif file_type == "md":
358
+ return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">📝 Download {os.path.basename(file)}</a>'
359
+ else:
360
+ return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
361
+
362
+ def clean_for_speech(text: str) -> str:
363
+ """Clean up text for TTS output."""
364
+ text = text.replace("\n", " ")
365
+ text = text.replace("</s>", " ")
366
+ text = text.replace("#", "")
367
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
368
+ text = re.sub(r"\s+", " ", text).strip()
369
+ return text
370
+
371
+ # ─────────────────────────────────────────────────────────
372
+ # 5 MINUTE RESEARCH PAPER FEATURE (NEW CODE) 🚀📚
373
+ # ────────────────────────────────────���────────────────────
374
+
375
+ def generate_pdf_link(url: str) -> str:
376
+ """
377
+ 🔗 Generate PDF link from abstract URL by replacing 'abs' with 'pdf' and appending .pdf if needed.
378
+ """
379
+ if "abs" in url:
380
+ pdf_url = url.replace("abs", "pdf")
381
+ if not pdf_url.endswith(".pdf"):
382
+ pdf_url += ".pdf"
383
+ return pdf_url
384
+ return url
385
+
386
+ def generate_5min_feature_markdown(paper: dict) -> str:
387
+ """
388
+ ✨ Generate detailed markdown for a paper including:
389
+ - Word count for title and summary
390
+ - High info words list (up to 15 terms)
391
+ - PDF link (derived from abstract URL)
392
+ - A pseudo ROUGE score
393
+ - A mermaid graph code block for the 15 concepts
394
+ """
395
+ title = paper.get('title', '')
396
+ summary = paper.get('summary', '')
397
+ authors = paper.get('authors', '')
398
+ date = paper.get('date', '')
399
+ url = paper.get('url', '')
400
+ pdf_link = generate_pdf_link(url)
401
+ title_wc = len(title.split())
402
+ summary_wc = len(summary.split())
403
+ high_info_terms = get_high_info_terms(summary, top_n=15)
404
+ terms_str = ", ".join(high_info_terms)
405
+ # Compute a pseudo ROUGE score as percentage of high info terms to summary words
406
+ rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
407
+
408
+ # Generate mermaid graph code block connecting terms sequentially
409
+ mermaid_code = "```mermaid\nflowchart TD\n"
410
+ for i in range(len(high_info_terms) - 1):
411
+ mermaid_code += f' T{i+1}["{high_info_terms[i]}"] --> T{i+2}["{high_info_terms[i+1]}"]\n'
412
+ mermaid_code += "```"
413
+
414
+ md = f"""
415
+ ## 📄 {title}
416
+
417
+ **Authors:** {authors}
418
+ **Date:** {date}
419
+ **Word Count (Title):** {title_wc} | **Word Count (Summary):** {summary_wc}
420
+
421
+ **Links:** [Abstract]({url}) | [PDF]({pdf_link})
422
+
423
+ **High Info Terms:** {terms_str}
424
+ **ROUGE Score:** {rouge_score}%
425
+
426
+ ### 🎤 TTF Read Aloud
427
+ - **Title:** {title}
428
+ - **Key Terms:** {terms_str}
429
+ - **ROUGE:** {rouge_score}%
430
+
431
+ #### Mermaid Graph of Key Concepts
432
+ {mermaid_code}
433
+
434
+ ---
435
+ """
436
+ return md
437
+
438
+ def create_detailed_paper_md(papers: list) -> str:
439
+ """
440
+ 📝 Create a detailed markdown string for all papers including 5 minute research paper features.
441
+ """
442
+ md_parts = ["# Detailed Research Paper Summary\n"]
443
+ for idx, paper in enumerate(papers, start=1):
444
+ md_parts.append(generate_5min_feature_markdown(paper))
445
+ return "\n".join(md_parts)
446
+
447
+ # ─────────────────────────────────────────────────────────
448
+ # 4. OPTIMIZED AUDIO GENERATION (ASYNC TTS + CACHING)
449
+ # ─────────────────────────────────────────────────────────
450
+
451
+ def clean_for_speech(text: str) -> str:
452
+ """
453
+ 🔉 Clean up text for TTS output with enhanced cleaning.
454
+ Removes markdown, code blocks, links, etc.
455
+ """
456
+ with PerformanceTimer("text_cleaning"):
457
+ # Remove markdown headers
458
+ text = re.sub(r'#+ ', '', text)
459
+ # Remove link formats [text](url)
460
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
461
+ # Remove emphasis markers (*, _, ~, `)
462
+ text = re.sub(r'[*_~`]', '', text)
463
+ # Remove code blocks
464
+ text = re.sub(r'```[\s\S]*?```', '', text)
465
+ text = re.sub(r'`[^`]*`', '', text)
466
+ # Remove excess whitespace
467
+ text = re.sub(r'\s+', ' ', text).replace("\n", " ")
468
+ # Remove hidden S tokens
469
+ text = text.replace("</s>", " ")
470
+ # Remove URLs
471
+ text = re.sub(r'https?://\S+', '', text)
472
+ text = re.sub(r'\(https?://[^\)]+\)', '', text)
473
+ text = text.strip()
474
+ return text
475
+
476
+ async def async_edge_tts_generate(
477
+ text: str,
478
+ voice: str,
479
+ rate: int = 0,
480
+ pitch: int = 0,
481
+ file_format: str = "mp3"
482
+ ) -> Tuple[Optional[str], float]:
483
+ """
484
+ 🎶 Asynchronous TTS generation with caching and performance tracking.
485
+ Returns (filename, generation_time).
486
+ """
487
+ with PerformanceTimer("tts_generation") as timer:
488
+ # ▶ Clean & validate text
489
+ text = clean_for_speech(text)
490
+ if not text.strip():
491
+ return None, 0
492
+
493
+ # ▶ Check cache (avoid regenerating the same TTS)
494
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
495
+ if cache_key in st.session_state['audio_cache']:
496
+ return st.session_state['audio_cache'][cache_key], 0
497
+
498
+ try:
499
+ # ▶ Generate audio
500
+ rate_str = f"{rate:+d}%"
501
+ pitch_str = f"{pitch:+d}Hz"
502
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
503
+
504
+ # ▶ Generate unique filename
505
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
506
+ filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
507
+
508
+ # ▶ Save audio file
509
+ await communicate.save(filename)
510
+
511
+ # ▶ Store in cache
512
+ st.session_state['audio_cache'][cache_key] = filename
513
+
514
+ # ▶ Return path + timing
515
+ return filename, time.time() - timer.start_time
516
+
517
+ except Exception as e:
518
+ st.error(f"❌ Error generating audio: {str(e)}")
519
+ return None, 0
520
+
521
+ # NEW: Define speak_with_edge_tts using our async function and return only the filename
522
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
523
+ """Wrapper for the async TTS generate call. Returns only the filename."""
524
+ result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
525
+ if isinstance(result, tuple):
526
+ return result[0]
527
+ return result
528
+
529
+ async def async_save_qa_with_audio(
530
+ question: str,
531
+ answer: str,
532
+ voice: Optional[str] = None
533
+ ) -> Tuple[str, Optional[str], float, float]:
534
+ """
535
+ 📝 Asynchronously save Q&A to markdown, then generate audio if enabled.
536
+ Returns (md_file, audio_file, md_time, audio_time).
537
+ """
538
+ voice = voice or st.session_state['tts_voice']
539
+
540
+ with PerformanceTimer("qa_save") as timer:
541
+ # ▶ Save Q/A as markdown
542
+ md_start = time.time()
543
+ md_file = create_file(question, answer, "md")
544
+ md_time = time.time() - md_start
545
+
546
+ # ▶ Generate audio (if globally enabled)
547
+ audio_file = None
548
+ audio_time = 0
549
+ if st.session_state['enable_audio']:
550
+ audio_text = f"{question}\n\nAnswer: {answer}"
551
+ audio_file, audio_time = await async_edge_tts_generate(
552
+ audio_text,
553
+ voice=voice,
554
+ file_format=st.session_state['audio_format']
555
+ )
556
+
557
+ return md_file, audio_file, md_time, audio_time
558
+
559
+ def save_qa_with_audio(question, answer, voice=None):
560
+ """Save Q&A to markdown and also generate audio."""
561
+ if not voice:
562
+ voice = st.session_state['tts_voice']
563
+
564
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
565
+ md_file = create_file(question, answer, "md")
566
+ audio_text = f"{question}\n\nAnswer: {answer}"
567
+ audio_file = speak_with_edge_tts(
568
+ audio_text,
569
+ voice=voice,
570
+ file_format=st.session_state['audio_format']
571
+ )
572
+ return md_file, audio_file
573
+
574
+ def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
575
+ """
576
+ ⬇️ Create a download link for a file with caching & error handling.
577
+ """
578
+ with PerformanceTimer("download_link_generation"):
579
+ cache_key = f"dl_{file_path}"
580
+ if cache_key in st.session_state['download_link_cache']:
581
+ return st.session_state['download_link_cache'][cache_key]
582
+
583
+ try:
584
+ with open(file_path, "rb") as f:
585
+ b64 = base64.b64encode(f.read()).decode()
586
+ filename = os.path.basename(file_path)
587
+
588
+ if file_type == "mp3":
589
+ link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎵 Download {filename}</a>'
590
+ elif file_type == "wav":
591
+ link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">🔊 Download {filename}</a>'
592
+ elif file_type == "md":
593
+ link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📝 Download {filename}</a>'
594
+ else:
595
+ link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">⬇️ Download {filename}</a>'
596
+
597
+ st.session_state['download_link_cache'][cache_key] = link
598
+ return link
599
+
600
+ except Exception as e:
601
+ st.error(f"❌ Error creating download link: {str(e)}")
602
+ return ""
603
+
604
+ # NEW: Define play_and_download_audio to play audio and provide a download link.
605
+ def play_and_download_audio(file_path, file_type="mp3"):
606
+ """Streamlit audio + a quick download link."""
607
+ if file_path and isinstance(file_path, str) and os.path.exists(file_path):
608
+ st.audio(file_path)
609
+ dl_link = get_download_link(file_path, file_type=file_type)
610
+ st.markdown(dl_link, unsafe_allow_html=True)
611
+
612
+ # ─────────────────────────────────────────────────────────
613
+ # 5. RESEARCH / ARXIV FUNCTIONS
614
+ # ─────────────────────────────────────────────────────────
615
+
616
+ def parse_arxiv_refs(ref_text: str):
617
+ """
618
+ 📜 Given a multi-line markdown with Arxiv references,
619
+ parse them into a list of dicts: {date, title, url, authors, summary}.
620
+ """
621
+ if not ref_text:
622
+ return []
623
+ results = []
624
+ current_paper = {}
625
+ lines = ref_text.split('\n')
626
+
627
+ for i, line in enumerate(lines):
628
+ if line.count('|') == 2:
629
+ # Found a new paper line
630
+ if current_paper:
631
+ results.append(current_paper)
632
+ if len(results) >= 20:
633
+ break
634
+ try:
635
+ header_parts = line.strip('* ').split('|')
636
+ date = header_parts[0].strip()
637
+ title = header_parts[1].strip()
638
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
639
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
640
+
641
+ current_paper = {
642
+ 'date': date,
643
+ 'title': title,
644
+ 'url': url,
645
+ 'authors': '',
646
+ 'summary': '',
647
+ 'full_audio': None,
648
+ 'download_base64': '',
649
+ }
650
+ except Exception as e:
651
+ st.warning(f"⚠️ Error parsing paper header: {str(e)}")
652
+ current_paper = {}
653
+ continue
654
+ elif current_paper:
655
+ # If authors not set, fill it; otherwise, fill summary
656
+ if not current_paper['authors']:
657
+ current_paper['authors'] = line.strip('* ')
658
+ else:
659
+ if current_paper['summary']:
660
+ current_paper['summary'] += ' ' + line.strip()
661
+ else:
662
+ current_paper['summary'] = line.strip()
663
+
664
+ if current_paper:
665
+ results.append(current_paper)
666
+
667
+ return results[:20]
668
+
669
+ def create_paper_links_md(papers):
670
+ """
671
+ 🔗 Create a minimal .md content linking to each paper's Arxiv URL.
672
+ """
673
+ lines = ["# Paper Links\n"]
674
+ for i, p in enumerate(papers, start=1):
675
+ lines.append(f"{i}. **{p['title']}** — [Arxiv Link]({p['url']})")
676
+ return "\n".join(lines)
677
+
678
+ async def create_paper_audio_files(papers, input_question):
679
+ """
680
+ 🎧 For each paper, generate TTS audio summary and store the path in `paper['full_audio']`.
681
+ Also creates a base64 download link in `paper['download_base64']`.
682
+ """
683
+ for paper in papers:
684
+ try:
685
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
686
+ audio_text = clean_for_speech(audio_text)
687
+ file_format = st.session_state['audio_format']
688
+ audio_file, _ = await async_edge_tts_generate(
689
+ audio_text,
690
+ voice=st.session_state['tts_voice'],
691
+ file_format=file_format
692
+ )
693
+ paper['full_audio'] = audio_file
694
+
695
+ if audio_file:
696
+ # Convert to base64 link
697
+ ext = file_format
698
+ download_link = create_download_link_with_cache(audio_file, file_type=ext)
699
+ paper['download_base64'] = download_link
700
+
701
+ except Exception as e:
702
+ st.warning(f"⚠️ Error processing paper {paper['title']}: {str(e)}")
703
+ paper['full_audio'] = None
704
+ paper['download_base64'] = ''
705
+
706
+ def display_papers(papers, marquee_settings):
707
+ """
708
+ 📑 Display paper info in the main area with marquee + expanders + audio.
709
+ """
710
+ st.write("## 🔎 Research Papers")
711
+ for i, paper in enumerate(papers, start=1):
712
+ marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]} | 📝 {paper['summary'][:200]}"
713
+ display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
714
+
715
+ with st.expander(f"{i}. 📄 {paper['title']}", expanded=True):
716
+ st.markdown(f"**{paper['date']} | {paper['title']}** — [Arxiv Link]({paper['url']})")
717
+ # NEW: Add PDF link next to abstract link
718
+ pdf_link = generate_pdf_link(paper['url'])
719
+ st.markdown(f"**PDF Link:** [PDF]({pdf_link})")
720
+ st.markdown(f"*Authors:* {paper['authors']}")
721
+ st.markdown(paper['summary'])
722
+ # NEW: Append detailed 5min feature markdown for this paper
723
+ st.markdown(generate_5min_feature_markdown(paper))
724
+ if paper.get('full_audio'):
725
+ st.write("📚 **Paper Audio**")
726
+ st.audio(paper['full_audio'])
727
+ if paper['download_base64']:
728
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
729
+
730
+ def display_papers_in_sidebar(papers):
731
+ """
732
+ 🔎 Mirrors the paper listing in the sidebar with expanders, audio, etc.
733
+ """
734
+ st.sidebar.title("🎶 Papers & Audio")
735
+ for i, paper in enumerate(papers, start=1):
736
+ with st.sidebar.expander(f"{i}. {paper['title']}"):
737
+ st.markdown(f"**Arxiv:** [Link]({paper['url']})")
738
+ # NEW: Add PDF link in sidebar as well
739
+ pdf_link = generate_pdf_link(paper['url'])
740
+ st.markdown(f"**PDF:** [PDF]({pdf_link})")
741
+ if paper['full_audio']:
742
+ st.audio(paper['full_audio'])
743
+ if paper['download_base64']:
744
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
745
+ st.markdown(f"**Authors:** {paper['authors']}")
746
+ if paper['summary']:
747
+ st.markdown(f"**Summary:** {paper['summary'][:300]}...")
748
+ # NEW: Show 5min feature summary in sidebar expander
749
+ st.markdown(generate_5min_feature_markdown(paper))
750
+
751
+ # ─────────────────────────────────────────────────────────
752
+ # 6. ZIP FUNCTION
753
+ # ─────────────────────────────────────────────────────────
754
+
755
+ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
756
+ """
757
+ 📦 Zip up all relevant files, generating a short name from high-info terms.
758
+ Returns the zip filename if created, else None.
759
+ """
760
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
761
+ all_files = md_files + mp3_files + wav_files
762
+ if not all_files:
763
+ return None
764
+
765
+ all_content = []
766
+ for f in all_files:
767
+ if f.endswith('.md'):
768
+ with open(f, "r", encoding='utf-8') as file:
769
+ all_content.append(file.read())
770
+ elif f.endswith('.mp3') or f.endswith('.wav'):
771
+ basename = os.path.splitext(os.path.basename(f))[0]
772
+ words = basename.replace('_', ' ')
773
+ all_content.append(words)
774
+
775
+ all_content.append(input_question)
776
+ combined_content = " ".join(all_content)
777
+ info_terms = get_high_info_terms(combined_content, top_n=10)
778
+
779
+ timestamp = format_timestamp_prefix()
780
+ name_text = '-'.join(term for term in info_terms[:5])
781
+ short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
782
+
783
+ with zipfile.ZipFile(short_zip_name, 'w') as z:
784
+ for f in all_files:
785
+ z.write(f)
786
+ return short_zip_name
787
+
788
+ # ─────────────────────────────────────────────────────────
789
+ # 7. MAIN AI LOGIC: LOOKUP & TAB HANDLERS
790
+ # ─────────────────────────────────────────────────────────
791
+
792
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
793
+ titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
794
+ """Main routine that uses Anthropic (Claude) + Gradio ArXiv RAG pipeline."""
795
+ start = time.time()
796
+ ai_constitution = """
797
+ You are a medical and machine learning review board expert and streamlit python and html5 expert. You are tasked with creating a streamlit app.py and requirements.txt for a solution that answers the questions with a working app to demonstrate. You are to use the paper list below to answer the question thinking through step by step how to create a streamlit app.py and requirements.txt for the solution that answers the questions with a working app to demonstrate.
798
+ """
799
+
800
+ # --- 1) Claude API
801
+ client = anthropic.Anthropic(api_key=anthropic_key)
802
+ user_input = q
803
+ response = client.messages.create(
804
+ model="claude-3-sonnet-20240229",
805
+ max_tokens=1000,
806
+ messages=[
807
+ {"role": "user", "content": user_input}
808
+ ])
809
+ st.write("Claude's reply 🧠:")
810
+ st.markdown(response.content[0].text)
811
+
812
+ # Save & produce audio
813
+ result = response.content[0].text
814
+ create_file(q, result)
815
+ md_file, audio_file = save_qa_with_audio(q, result)
816
+ st.subheader("📝 Main Response Audio")
817
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
818
+
819
+ if useArxiv:
820
+ q = q + result # Feed Arxiv the question and Claude's answer for prompt fortification to get better answers and references
821
+ # --- 2) Arxiv RAG
822
+ st.write('Running Arxiv RAG with Claude inputs.')
823
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
824
+ refs = client.predict(
825
+ q,
826
+ 10,
827
+ "Semantic Search",
828
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
829
+ api_name="/update_with_rag_md"
830
+ )[0]
831
+
832
+ result = f"🔎 {q}\n\n{refs}" # use original question q with result paired with paper references for best prompt fortification
833
+
834
+ md_file, audio_file = save_qa_with_audio(q, result)
835
+ st.subheader("📝 Main Response Audio")
836
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
837
+
838
+ # --- 3) Parse + handle papers
839
+ papers = parse_arxiv_refs(refs)
840
+ if papers:
841
+ # Create minimal links page first
842
+ paper_links = create_paper_links_md(papers)
843
+ links_file = create_file(q, paper_links, "md")
844
+ st.markdown(paper_links)
845
+
846
+ # NEW: Create detailed markdown with 5 minute research paper features
847
+ detailed_md = create_detailed_paper_md(papers)
848
+ detailed_file = create_file(q, detailed_md, "md")
849
+ st.markdown(detailed_md)
850
+
851
+ # Then create audio for each paper if desired
852
+ if useArxivAudio:
853
+ asyncio.run(create_paper_audio_files(papers, input_question=q))
854
+
855
+ display_papers(papers, get_marquee_settings()) # scrolling marquee per paper and summary
856
+ display_papers_in_sidebar(papers) # sidebar entry per paper and summary
857
+ else:
858
+ st.warning("No papers found in the response.")
859
+
860
+ # --- 4) Claude API with arxiv list of papers to app.py
861
+ client = anthropic.Anthropic(api_key=anthropic_key)
862
+ user_input = q + '\n\n' + 'Use the reference papers below to answer the question by creating a python streamlit app.py and requirements.txt with python libraries for creating a single app.py application that answers the questions with working code to demonstrate.'+ '\n\n'
863
+ response = client.messages.create(
864
+ model="claude-3-sonnet-20240229",
865
+ max_tokens=1000,
866
+ messages=[
867
+ {"role": "user", "content": user_input}
868
+ ])
869
+ r2 = response.content[0].text
870
+ st.write("Claude's reply 🧠:")
871
+ st.markdown(r2)
872
+
873
+ elapsed = time.time() - start
874
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
875
+ return result
876
+
877
+ def perform_ai_lookup_old(
878
+ q,
879
+ vocal_summary=True,
880
+ extended_refs=False,
881
+ titles_summary=True,
882
+ full_audio=False
883
+ ):
884
+ """
885
+ 🔮 Main routine that uses Anthropic (Claude) + optional Gradio ArXiv RAG pipeline.
886
+ Currently demonstrates calling Anthropic and returning the text.
887
+ """
888
+ with PerformanceTimer("ai_lookup"):
889
+ start = time.time()
890
+
891
+ # ▶ Example call to Anthropic (Claude)
892
+ client = anthropic.Anthropic(api_key=anthropic_key)
893
+ user_input = q
894
+
895
+ # Here we do a minimal prompt, just to show the call
896
+ # (You can enhance your prompt engineering as needed)
897
+ response = client.completions.create(
898
+ model="claude-2",
899
+ max_tokens_to_sample=512,
900
+ prompt=f"{anthropic.HUMAN_PROMPT} {user_input}{anthropic.AI_PROMPT}"
901
+ )
902
+
903
+ result_text = response.completion.strip()
904
+
905
+ # ▶ Print and store
906
+ st.write("### Claude's reply 🧠:")
907
+ st.markdown(result_text)
908
+
909
+ # ▶ We'll add to the chat history
910
+ st.session_state.chat_history.append({"user": q, "claude": result_text})
911
+
912
+ # ▶ Return final text
913
+ end = time.time()
914
+ st.write(f"**Elapsed:** {end - start:.2f}s")
915
+
916
+ return result_text
917
+
918
+ async def process_voice_input(text):
919
+ """
920
+ 🎤 When user sends a voice query, we run the AI lookup + Q/A with audio.
921
+ Then we store the resulting markdown & audio in session or disk.
922
+ """
923
+ if not text:
924
+ return
925
+ st.subheader("🔍 Search Results")
926
+
927
+ # ▶ Call AI
928
+ result = perform_ai_lookup(
929
+ text,
930
+ vocal_summary=True,
931
+ extended_refs=False,
932
+ titles_summary=True,
933
+ full_audio=True
934
+ )
935
+
936
+ # ▶ Save Q&A as Markdown + audio (async)
937
+ md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio(text, result)
938
+
939
+ st.subheader("📝 Generated Files")
940
+ st.write(f"**Markdown:** {md_file} (saved in {md_time:.2f}s)")
941
+ if audio_file:
942
+ st.write(f"**Audio:** {audio_file} (generated in {audio_time:.2f}s)")
943
+ st.audio(audio_file)
944
+ dl_link = create_download_link_with_cache(audio_file, file_type=st.session_state['audio_format'])
945
+ st.markdown(dl_link, unsafe_allow_html=True)
946
+
947
+ def display_voice_tab():
948
+ """
949
+ 🎙️ Display the voice input tab with TTS settings and real-time usage.
950
+ """
951
+
952
+ # ▶ Voice Settings
953
+ st.sidebar.markdown("### 🎤 Voice Settings")
954
+ caption_female = 'Top: 🌸 **Aria** – 🎶 **Jenny** – 🌺 **Sonia** – 🌌 **Natasha** – 🌷 **Clara**'
955
+ caption_male = 'Bottom: 🌟 **Guy** – 🛠️ **Ryan** – 🎻 **William** – 🌟 **Liam**'
956
+
957
+ # Optionally, replace with your own local image or comment out
958
+ try:
959
+ st.sidebar.image('Group Picture - Voices.png', caption=caption_female + ' | ' + caption_male)
960
+ except:
961
+ st.sidebar.write('.')
962
+
963
+ selected_voice = st.sidebar.selectbox(
964
+ "👄 Select TTS Voice:",
965
+ options=EDGE_TTS_VOICES,
966
+ index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
967
+ )
968
+
969
+ st.sidebar.markdown("""
970
+ # 🎙️ Voice Character Agent Selector 🎭
971
+ *Female Voices*:
972
+ - 🌸 **Aria** – Elegant, creative storytelling
973
+ - 🎶 **Jenny** – Friendly, conversational
974
+ - 🌺 **Sonia** – Bold, confident
975
+ - 🌌 **Natasha** – Sophisticated, mysterious
976
+ - 🌷 **Clara** – Cheerful, empathetic
977
+
978
+ *Male Voices*:
979
+ - 🌟 **Guy** – Authoritative, versatile
980
+ - 🛠️ **Ryan** – Approachable, casual
981
+ - 🎻 **William** – Classic, scholarly
982
+ - 🌟 **Liam** – Energetic, engaging
983
+ """)
984
+
985
+ # ▶ Audio Format
986
+ st.markdown("### 🔊 Audio Format")
987
+ selected_format = st.radio(
988
+ "Choose Audio Format:",
989
+ options=["MP3", "WAV"],
990
+ index=0
991
+ )
992
+
993
+ # ▶ Update session state if changed
994
+ if selected_voice != st.session_state['tts_voice']:
995
+ st.session_state['tts_voice'] = selected_voice
996
+ st.rerun()
997
+ if selected_format.lower() != st.session_state['audio_format']:
998
+ st.session_state['audio_format'] = selected_format.lower()
999
+ st.rerun()
1000
+
1001
+ # ▶ Text Input
1002
+ user_text = st.text_area("💬 Message:", height=100)
1003
+ user_text = user_text.strip().replace('\n', ' ')
1004
+
1005
+ # ▶ Send Button
1006
+ if st.button("📨 Send"):
1007
+ # Run our process_voice_input as an async function
1008
+ asyncio.run(process_voice_input(user_text))
1009
+
1010
+ # ▶ Chat History
1011
+ st.subheader("📜 Chat History")
1012
+ for c in st.session_state.chat_history:
1013
+ st.write("**You:**", c["user"])
1014
+ st.write("**Response:**", c["claude"])
1015
+
1016
+ def display_file_history_in_sidebar():
1017
+ """
1018
+ 📂 Shows a history of local .md, .mp3, .wav files (newest first),
1019
+ with quick icons and optional download links.
1020
+ """
1021
+ st.sidebar.markdown("---")
1022
+ st.sidebar.markdown("### 📂 File History")
1023
+
1024
+ # ▶ Gather all files
1025
+ md_files = glob.glob("*.md")
1026
+ mp3_files = glob.glob("*.mp3")
1027
+ wav_files = glob.glob("*.wav")
1028
+ all_files = md_files + mp3_files + wav_files
1029
+
1030
+ if not all_files:
1031
+ st.sidebar.write("No files found.")
1032
+ return
1033
+
1034
+ # ▶ Sort newest first
1035
+ all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
1036
+
1037
+ # Group files by their query prefix (timestamp_query)
1038
+ grouped_files = {}
1039
+ for f in all_files:
1040
+ fname = os.path.basename(f)
1041
+ prefix = '_'.join(fname.split('_')[:6]) # Get timestamp part
1042
+ if prefix not in grouped_files:
1043
+ grouped_files[prefix] = {'md': [], 'audio': [], 'loaded': False}
1044
+
1045
+ ext = os.path.splitext(fname)[1].lower()
1046
+ if ext == '.md':
1047
+ grouped_files[prefix]['md'].append(f)
1048
+ elif ext in ['.mp3', '.wav']:
1049
+ grouped_files[prefix]['audio'].append(f)
1050
+
1051
+ # Sort groups by timestamp (newest first)
1052
+ sorted_groups = sorted(grouped_files.items(), key=lambda x: x[0], reverse=True)
1053
+
1054
+ # 🗑⬇️ Sidebar delete all and zip all download
1055
+ col1, col4 = st.sidebar.columns(2)
1056
+ with col1:
1057
+ if st.button("🗑 Delete All"):
1058
+ for f in all_files:
1059
+ os.remove(f)
1060
+ st.rerun()
1061
+ st.session_state.should_rerun = True
1062
+ with col4:
1063
+ if st.button("⬇️ Zip All"):
1064
+ zip_name = create_zip_of_files(md_files, mp3_files, wav_files,
1065
+ st.session_state.get('last_query', ''))
1066
+ if zip_name:
1067
+ st.sidebar.markdown(get_download_link(zip_name, "zip"),
1068
+ unsafe_allow_html=True)
1069
+
1070
+ # Display grouped files
1071
+ for prefix, files in sorted_groups:
1072
+ # Get a preview of content from first MD file
1073
+ preview = ""
1074
+ if files['md']:
1075
+ with open(files['md'][0], "r", encoding="utf-8") as f:
1076
+ preview = f.read(200).replace("\n", " ")
1077
+ if len(preview) > 200:
1078
+ preview += "..."
1079
+ # Create unique key for this group
1080
+ group_key = f"group_{prefix}"
1081
+ if group_key not in st.session_state:
1082
+ st.session_state[group_key] = False
1083
+
1084
+ # Display group expander
1085
+ with st.sidebar.expander(f"📑 Query Group: {prefix}"):
1086
+ st.write("**Preview:**")
1087
+ st.write(preview)
1088
+
1089
+ # Load full content button
1090
+ if st.button("📖 View Full Content", key=f"btn_{prefix}"):
1091
+ st.session_state[group_key] = True
1092
+
1093
+ # Only show full content and audio if button was clicked
1094
+ if st.session_state[group_key]:
1095
+ # Display markdown files
1096
+ for md_file in files['md']:
1097
+ with open(md_file, "r", encoding="utf-8") as f:
1098
+ content = f.read()
1099
+ st.markdown("**Full Content:**")
1100
+ st.markdown(content)
1101
+ st.markdown(get_download_link(md_file, file_type="md"),
1102
+ unsafe_allow_html=True)
1103
+
1104
+ # Display audio files
1105
+ usePlaySidebar=False
1106
+ if usePlaySidebar:
1107
+ for audio_file in files['audio']:
1108
+ ext = os.path.splitext(audio_file)[1].replace('.', '')
1109
+ st.audio(audio_file)
1110
+ st.markdown(get_download_link(audio_file, file_type=ext),
1111
+ unsafe_allow_html=True)
1112
+
1113
+ def main():
1114
+ # ▶ 1) Setup marquee UI in the sidebar
1115
+ update_marquee_settings_ui()
1116
+ marquee_settings = get_marquee_settings()
1117
+
1118
+ # ▶ 2) Display the marquee welcome
1119
+ display_marquee(
1120
+ st.session_state['marquee_content'],
1121
+ {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
1122
+ key_suffix="welcome"
1123
+ )
1124
+
1125
+ # ▶ 3) Main action tabs and model use choices
1126
+ tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
1127
+ horizontal=True)
1128
+
1129
+ useArxiv = st.checkbox("Search Arxiv for Research Paper Answers", value=True)
1130
+ useArxivAudio = st.checkbox("Generate Audio File for Research Paper Answers", value=False)
1131
+
1132
+ # ▶ 4) Show or hide custom component (optional example)
1133
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
1134
+ val = mycomponent(my_input_value="Hello from MyComponent")
1135
+
1136
+ if val:
1137
+ val_stripped = val.replace('\\n', ' ')
1138
+ edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
1139
+ run_option = st.selectbox("Model:", ["Arxiv", "Other (demo)"])
1140
+ col1, col2 = st.columns(2)
1141
+ with col1:
1142
+ autorun = st.checkbox("⚙ AutoRun", value=True)
1143
+ with col2:
1144
+ full_audio = st.checkbox("📚FullAudio", value=False)
1145
+
1146
+ input_changed = (val != st.session_state.old_val)
1147
+
1148
+ if autorun and input_changed:
1149
+ st.session_state.old_val = val
1150
+ st.session_state.last_query = edited_input
1151
+ perform_ai_lookup(edited_input,
1152
+ vocal_summary=True,
1153
+ extended_refs=False,
1154
+ titles_summary=True,
1155
+ full_audio=full_audio, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
1156
+ else:
1157
+ if st.button("▶ Run"):
1158
+ st.session_state.old_val = val
1159
+ st.session_state.last_query = edited_input
1160
+ perform_ai_lookup(edited_input,
1161
+ vocal_summary=True,
1162
+ extended_refs=False,
1163
+ titles_summary=True,
1164
+ full_audio=full_audio, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
1165
+
1166
+ # ─────────────────────────────────────────────────────────
1167
+ # TAB: ArXiv
1168
+ # ─────────────────────────────────────────────────────────
1169
+ if tab_main == "🔍 ArXiv":
1170
+ st.subheader("🔍 Query ArXiv")
1171
+ q = st.text_input("🔍 Query:", key="arxiv_query")
1172
+
1173
+ st.markdown("### 🎛 Options")
1174
+ vocal_summary = st.checkbox("🎙ShortAudio", value=True, key="option_vocal_summary")
1175
+ extended_refs = st.checkbox("📜LongRefs", value=False, key="option_extended_refs")
1176
+ titles_summary = st.checkbox("🔖TitlesOnly", value=True, key="option_titles_summary")
1177
+ full_audio = st.checkbox("📚FullAudio", value=False, key="option_full_audio")
1178
+ full_transcript = st.checkbox("🧾FullTranscript", value=False, key="option_full_transcript")
1179
+
1180
+ if q and st.button("🔍Run"):
1181
+ st.session_state.last_query = q
1182
+ result = perform_ai_lookup(q,
1183
+ vocal_summary=vocal_summary,
1184
+ extended_refs=extended_refs,
1185
+ titles_summary=titles_summary,
1186
+ full_audio=full_audio)
1187
+ if full_transcript:
1188
+ create_file(q, result, "md")
1189
+
1190
+ # ─────────────────────────────────────────────────────────
1191
+ # TAB: Voice
1192
+ # ─────────────────────────────────────────────────────────
1193
+ elif tab_main == "🎤 Voice":
1194
+ display_voice_tab()
1195
+
1196
+ # ─────────────────────────────────────────────────────────
1197
+ # TAB: Media
1198
+ # ─────────────────────────────────────────────────────────
1199
+ elif tab_main == "📸 Media":
1200
+ st.header("📸 Media Gallery")
1201
+ tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"])
1202
+
1203
+ # ▶ AUDIO sub-tab
1204
+ with tabs[0]:
1205
+ st.subheader("🎵 Audio Files")
1206
+ audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
1207
+ if audio_files:
1208
+ for a in audio_files:
1209
+ with st.expander(os.path.basename(a)):
1210
+ st.audio(a)
1211
+ ext = os.path.splitext(a)[1].replace('.', '')
1212
+ dl_link = get_download_link(a, file_type=ext)
1213
+ st.markdown(dl_link, unsafe_allow_html=True)
1214
+ else:
1215
+ st.write("No audio files found.")
1216
+
1217
+ # ▶ IMAGES sub-tab
1218
+ with tabs[1]:
1219
+ st.subheader("🖼 Image Files")
1220
+ imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
1221
+ if imgs:
1222
+ c = st.slider("Cols", 1, 5, 3, key="cols_images")
1223
+ cols = st.columns(c)
1224
+ for i, f in enumerate(imgs):
1225
+ with cols[i % c]:
1226
+ st.image(Image.open(f), use_container_width=True)
1227
+ else:
1228
+ st.write("No images found.")
1229
+
1230
+ # ▶ VIDEO sub-tab
1231
+ with tabs[2]:
1232
+ st.subheader("🎥 Video Files")
1233
+ vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
1234
+ if vids:
1235
+ for v in vids:
1236
+ with st.expander(os.path.basename(v)):
1237
+ st.video(v)
1238
+ else:
1239
+ st.write("No videos found.")
1240
+
1241
+ # ─────────────────────────────────────────────────────────
1242
+ # TAB: Editor
1243
+ # ─────────────────────────────────────────────────────────
1244
+ elif tab_main == "📝 Editor":
1245
+ st.write("### 📝 File Editor (Minimal Demo)")
1246
+ st.write("Select or create a file to edit. More advanced features can be added as needed.")
1247
+
1248
+ # ─────────────────────────────────────────────────────────
1249
+ # SIDEBAR: FILE HISTORY + PERFORMANCE METRICS
1250
+ # ─────────────────────────────────────────────────────────
1251
+ display_file_history_in_sidebar()
1252
+ log_performance_metrics()
1253
+
1254
+ # ▶ Some light CSS styling
1255
+ st.markdown("""
1256
+ <style>
1257
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
1258
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
1259
+ .stButton>button { margin-right: 0.5rem; }
1260
+ </style>
1261
+ """, unsafe_allow_html=True)
1262
+
1263
+ # ▶ Rerun if needed
1264
+ if st.session_state.should_rerun:
1265
+ st.session_state.should_rerun = False
1266
+ st.rerun()
1267
+
1268
+ if __name__ == "__main__":
1269
+ main()