awacke1 commited on
Commit
4352211
·
verified ·
1 Parent(s): 1284757

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1265 -0
app.py ADDED
@@ -0,0 +1,1265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic
3
+ import openai
4
+ import base64
5
+ import cv2
6
+ import glob
7
+ import json
8
+ import math
9
+ import os
10
+ import pytz
11
+ import random
12
+ import re
13
+ import requests
14
+ #import textract
15
+ import time
16
+ import zipfile
17
+ import plotly.graph_objects as go
18
+ import streamlit.components.v1 as components
19
+ from datetime import datetime
20
+ from audio_recorder_streamlit import audio_recorder
21
+ from bs4 import BeautifulSoup
22
+ from collections import defaultdict, deque, Counter
23
+ from dotenv import load_dotenv
24
+ from gradio_client import Client
25
+ from huggingface_hub import InferenceClient
26
+ from io import BytesIO
27
+ from PIL import Image
28
+ from PyPDF2 import PdfReader
29
+ from urllib.parse import quote
30
+ from xml.etree import ElementTree as ET
31
+ from openai import OpenAI
32
+ import extra_streamlit_components as stx
33
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
34
+ import asyncio
35
+ import edge_tts
36
+ from streamlit_marquee import streamlit_marquee
37
+ from typing import Tuple, Optional
38
+ import pandas as pd
39
+
40
+ # ─────────────────────────────────────────────────────────
41
+ # 1. CORE CONFIGURATION & SETUP
42
+ # ─────────────────────────────────────────────────────────
43
+
44
+ st.set_page_config(
45
+ page_title="🚲TalkingAIResearcher🏆",
46
+ page_icon="🚲🏆",
47
+ layout="wide",
48
+ initial_sidebar_state="auto",
49
+ menu_items={
50
+ 'Get Help': 'https://huggingface.co/awacke1',
51
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
52
+ 'About': "🚲TalkingAIResearcher🏆"
53
+ }
54
+ )
55
+ load_dotenv()
56
+
57
+ # ▶ Available English voices for Edge TTS
58
+ EDGE_TTS_VOICES = [
59
+ "en-US-AriaNeural",
60
+ "en-US-GuyNeural",
61
+ "en-US-JennyNeural",
62
+ "en-GB-SoniaNeural",
63
+ "en-GB-RyanNeural",
64
+ "en-AU-NatashaNeural",
65
+ "en-AU-WilliamNeural",
66
+ "en-CA-ClaraNeural",
67
+ "en-CA-LiamNeural"
68
+ ]
69
+
70
+ # ▶ Initialize Session State
71
+ if 'marquee_settings' not in st.session_state:
72
+ st.session_state['marquee_settings'] = {
73
+ "background": "#1E1E1E",
74
+ "color": "#FFFFFF",
75
+ "font-size": "14px",
76
+ "animationDuration": "20s",
77
+ "width": "100%",
78
+ "lineHeight": "35px"
79
+ }
80
+ if 'tts_voice' not in st.session_state:
81
+ st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
82
+ if 'audio_format' not in st.session_state:
83
+ st.session_state['audio_format'] = 'mp3'
84
+ if 'transcript_history' not in st.session_state:
85
+ st.session_state['transcript_history'] = []
86
+ if 'chat_history' not in st.session_state:
87
+ st.session_state['chat_history'] = []
88
+ if 'openai_model' not in st.session_state:
89
+ st.session_state['openai_model'] = "gpt-4o-2024-05-13"
90
+ if 'messages' not in st.session_state:
91
+ st.session_state['messages'] = []
92
+ if 'last_voice_input' not in st.session_state:
93
+ st.session_state['last_voice_input'] = ""
94
+ if 'editing_file' not in st.session_state:
95
+ st.session_state['editing_file'] = None
96
+ if 'edit_new_name' not in st.session_state:
97
+ st.session_state['edit_new_name'] = ""
98
+ if 'edit_new_content' not in st.session_state:
99
+ st.session_state['edit_new_content'] = ""
100
+ if 'viewing_prefix' not in st.session_state:
101
+ st.session_state['viewing_prefix'] = None
102
+ if 'should_rerun' not in st.session_state:
103
+ st.session_state['should_rerun'] = False
104
+ if 'old_val' not in st.session_state:
105
+ st.session_state['old_val'] = None
106
+ if 'last_query' not in st.session_state:
107
+ st.session_state['last_query'] = ""
108
+ if 'marquee_content' not in st.session_state:
109
+ st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
110
+
111
+ # ▶ Additional keys for performance, caching, etc.
112
+ if 'audio_cache' not in st.session_state:
113
+ st.session_state['audio_cache'] = {}
114
+ if 'download_link_cache' not in st.session_state:
115
+ st.session_state['download_link_cache'] = {}
116
+ if 'operation_timings' not in st.session_state:
117
+ st.session_state['operation_timings'] = {}
118
+ if 'performance_metrics' not in st.session_state:
119
+ st.session_state['performance_metrics'] = defaultdict(list)
120
+ if 'enable_audio' not in st.session_state:
121
+ st.session_state['enable_audio'] = True # Turn TTS on/off
122
+
123
+ # ▶ API Keys
124
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
125
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
126
+ xai_key = os.getenv('xai',"")
127
+ if 'OPENAI_API_KEY' in st.secrets:
128
+ openai_api_key = st.secrets['OPENAI_API_KEY']
129
+ if 'ANTHROPIC_API_KEY' in st.secrets:
130
+ anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
131
+
132
+ openai.api_key = openai_api_key
133
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
134
+ HF_KEY = os.getenv('HF_KEY')
135
+ API_URL = os.getenv('API_URL')
136
+
137
+ # ▶ Helper constants
138
+ FILE_EMOJIS = {
139
+ "md": "📝",
140
+ "mp3": "🎵",
141
+ "wav": "🔊"
142
+ }
143
+
144
+ # ──────────────────────────────��──────────────────────────
145
+ # 2. PERFORMANCE MONITORING & TIMING
146
+ # ─────────────────────────────────────────────────────────
147
+
148
+ class PerformanceTimer:
149
+ """
150
+ ⏱️ A context manager for timing operations with automatic logging.
151
+ Usage:
152
+ with PerformanceTimer("my_operation"):
153
+ # do something
154
+ The duration is stored into `st.session_state['operation_timings']`
155
+ and appended to the `performance_metrics` list.
156
+ """
157
+ def __init__(self, operation_name: str):
158
+ self.operation_name = operation_name
159
+ self.start_time = None
160
+
161
+ def __enter__(self):
162
+ self.start_time = time.time()
163
+ return self
164
+
165
+ def __exit__(self, exc_type, exc_val, exc_tb):
166
+ if not exc_type: # Only log if no exception occurred
167
+ duration = time.time() - self.start_time
168
+ st.session_state['operation_timings'][self.operation_name] = duration
169
+ st.session_state['performance_metrics'][self.operation_name].append(duration)
170
+
171
+ def log_performance_metrics():
172
+ """
173
+ 📈 Display performance metrics in the sidebar, including a timing breakdown
174
+ and a small bar chart of average times.
175
+ """
176
+ st.sidebar.markdown("### ⏱️ Performance Metrics")
177
+
178
+ metrics = st.session_state['operation_timings']
179
+ if metrics:
180
+ total_time = sum(metrics.values())
181
+ st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
182
+
183
+ # Break down each operation time
184
+ for operation, duration in metrics.items():
185
+ percentage = (duration / total_time) * 100
186
+ st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
187
+
188
+ # Show timing history chart
189
+ history_data = []
190
+ for op, times in st.session_state['performance_metrics'].items():
191
+ if times: # Only if we have data
192
+ avg_time = sum(times) / len(times)
193
+ history_data.append({"Operation": op, "Avg Time (s)": avg_time})
194
+
195
+ if history_data:
196
+ st.sidebar.markdown("### 📊 Timing History (Avg)")
197
+ chart_data = pd.DataFrame(history_data)
198
+ st.sidebar.bar_chart(chart_data.set_index("Operation"))
199
+
200
+ # ─────────────────────────────────────────────────────────
201
+ # 3. HELPER FUNCTIONS (FILENAMES, LINKS, MARQUEE, ETC.)
202
+ # ─────────────────────────────────────────────────────────
203
+
204
+ def get_central_time():
205
+ """🌎 Get current time in US Central timezone."""
206
+ central = pytz.timezone('US/Central')
207
+ return datetime.now(central)
208
+
209
+ def format_timestamp_prefix():
210
+ """📅 Generate a timestamp prefix"""
211
+ ct = get_central_time()
212
+ return ct.strftime("%Y%m%d_%H%M%S")
213
+
214
+ def initialize_marquee_settings():
215
+ """🌈 Initialize marquee defaults if needed."""
216
+ if 'marquee_settings' not in st.session_state:
217
+ st.session_state['marquee_settings'] = {
218
+ "background": "#1E1E1E",
219
+ "color": "#FFFFFF",
220
+ "font-size": "14px",
221
+ "animationDuration": "20s",
222
+ "width": "100%",
223
+ "lineHeight": "35px"
224
+ }
225
+
226
+ def get_marquee_settings():
227
+ """🔧 Retrieve marquee settings from session."""
228
+ initialize_marquee_settings()
229
+ return st.session_state['marquee_settings']
230
+
231
+ def update_marquee_settings_ui():
232
+ """🖌 Add color pickers & sliders for marquee config in the sidebar."""
233
+ st.sidebar.markdown("### 🎯 Marquee Settings")
234
+ cols = st.sidebar.columns(2)
235
+ with cols[0]:
236
+ bg_color = st.color_picker("🎨 Background",
237
+ st.session_state['marquee_settings']["background"],
238
+ key="bg_color_picker")
239
+ text_color = st.color_picker("✍️ Text",
240
+ st.session_state['marquee_settings']["color"],
241
+ key="text_color_picker")
242
+ with cols[1]:
243
+ font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider")
244
+ duration = st.slider("⏱️ Speed (secs)", 1, 20, 20, key="duration_slider")
245
+
246
+ st.session_state['marquee_settings'].update({
247
+ "background": bg_color,
248
+ "color": text_color,
249
+ "font-size": f"{font_size}px",
250
+ "animationDuration": f"{duration}s"
251
+ })
252
+
253
+ def display_marquee(text, settings, key_suffix=""):
254
+ """
255
+ 🎉 Show a marquee text with style from the marquee settings.
256
+ Automatically truncates text to ~280 chars to avoid overflow.
257
+ """
258
+ truncated_text = text[:280] + "..." if len(text) > 280 else text
259
+ streamlit_marquee(
260
+ content=truncated_text,
261
+ **settings,
262
+ key=f"marquee_{key_suffix}"
263
+ )
264
+ st.write("")
265
+
266
+ def get_high_info_terms(text: str, top_n=10) -> list:
267
+ """
268
+ 📌 Extract top_n frequent words & bigrams (excluding common stopwords).
269
+ Useful for generating short descriptive keywords from Q/A content.
270
+ """
271
+ stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
272
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
273
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
274
+ combined = words + bi_grams
275
+ filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2]
276
+ counter = Counter(filtered)
277
+ return [term for term, freq in counter.most_common(top_n)]
278
+
279
+ def clean_text_for_filename(text: str) -> str:
280
+ """
281
+ 🏷️ Remove special chars & short unhelpful words from text for safer filenames.
282
+ Returns a lowercased, underscore-joined token string.
283
+ """
284
+ text = text.lower()
285
+ text = re.sub(r'[^\w\s-]', '', text)
286
+ words = text.split()
287
+ stop_short = set(['the', 'and', 'for', 'with', 'this', 'that', 'ai', 'library'])
288
+ filtered = [w for w in words if len(w) > 3 and w not in stop_short]
289
+ return '_'.join(filtered)[:200]
290
+
291
+ def generate_filename(prompt, response, file_type="md", max_length=200):
292
+ """
293
+ 📁 Create a shortened filename based on prompt+response content:
294
+ 1) Extract top info terms,
295
+ 2) Combine snippet from prompt+response,
296
+ 3) Remove duplicates,
297
+ 4) Append word counts and estimated duration tokens,
298
+ 5) Truncate if needed.
299
+ """
300
+ prefix = format_timestamp_prefix() + "_"
301
+ combined_text = (prompt + " " + response)[:200]
302
+ info_terms = get_high_info_terms(combined_text, top_n=5)
303
+ snippet = (prompt[:40] + " " + response[:40]).strip()
304
+ snippet_cleaned = clean_text_for_filename(snippet)
305
+
306
+ # Remove duplicates
307
+ name_parts = info_terms + [snippet_cleaned]
308
+ seen = set()
309
+ unique_parts = []
310
+ for part in name_parts:
311
+ if part not in seen:
312
+ seen.add(part)
313
+ unique_parts.append(part)
314
+
315
+ # NEW: Compute word counts for title (prompt) and summary (response) and estimated duration
316
+ wct = len(prompt.split())
317
+ sw = len(response.split())
318
+ # Estimated duration (seconds) assuming a reading speed of 2.5 words per second
319
+ estimated_duration = round((wct + sw) / 2.5)
320
+
321
+ base_name = '_'.join(unique_parts).strip('_')
322
+ # NEW: Append new tokens for word counts and duration
323
+ extra_tokens = f"_wct{wct}_sw{sw}_dur{estimated_duration}"
324
+ leftover_chars = max_length - len(prefix) - len(file_type) - 1
325
+ if len(base_name) + len(extra_tokens) > leftover_chars:
326
+ base_name = base_name[:leftover_chars - len(extra_tokens)]
327
+ full_name = base_name + extra_tokens
328
+
329
+ return f"{prefix}{full_name}.{file_type}"
330
+
331
+ def create_file(prompt, response, file_type="md"):
332
+ """
333
+ 📝 Create a text file from prompt + response with a sanitized filename.
334
+ Returns the created filename.
335
+ """
336
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
337
+ with open(filename, 'w', encoding='utf-8') as f:
338
+ f.write(prompt + "\n\n" + response)
339
+ return filename
340
+
341
+ def get_download_link(file, file_type="zip"):
342
+ """
343
+ Convert a file to base64 and return an HTML link for download.
344
+ """
345
+ with open(file, "rb") as f:
346
+ b64 = base64.b64encode(f.read()).decode()
347
+ if file_type == "zip":
348
+ return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">📂 Download {os.path.basename(file)}</a>'
349
+ elif file_type == "mp3":
350
+ return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">🎵 Download {os.path.basename(file)}</a>'
351
+ elif file_type == "wav":
352
+ return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">🔊 Download {os.path.basename(file)}</a>'
353
+ elif file_type == "md":
354
+ return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">📝 Download {os.path.basename(file)}</a>'
355
+ else:
356
+ return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
357
+
358
+ def clean_for_speech(text: str) -> str:
359
+ """Clean up text for TTS output."""
360
+ text = text.replace("\n", " ")
361
+ text = text.replace("</s>", " ")
362
+ text = text.replace("#", "")
363
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
364
+ text = re.sub(r"\s+", " ", text).strip()
365
+ return text
366
+
367
+ # ─────────────────────────────────────────────────────────
368
+ # 5 MINUTE RESEARCH PAPER FEATURE (NEW CODE) 🚀📚
369
+ # ─────────────────────────────────────────────────────────
370
+
371
+ def generate_pdf_link(url: str) -> str:
372
+ """
373
+ 🔗 Generate PDF link from abstract URL by replacing 'abs' with 'pdf' and appending .pdf if needed.
374
+ """
375
+ if "abs" in url:
376
+ pdf_url = url.replace("abs", "pdf")
377
+ if not pdf_url.endswith(".pdf"):
378
+ pdf_url += ".pdf"
379
+ return pdf_url
380
+ return url
381
+
382
+ def generate_5min_feature_markdown(paper: dict) -> str:
383
+ """
384
+ ✨ Generate detailed markdown for a paper including:
385
+ - Word count for title and summary
386
+ - High info words list (up to 15 terms)
387
+ - PDF link (derived from abstract URL)
388
+ - A pseudo ROUGE score
389
+ - A mermaid graph code block for the 15 concepts
390
+ """
391
+ title = paper.get('title', '')
392
+ summary = paper.get('summary', '')
393
+ authors = paper.get('authors', '')
394
+ date = paper.get('date', '')
395
+ url = paper.get('url', '')
396
+ pdf_link = generate_pdf_link(url)
397
+ title_wc = len(title.split())
398
+ summary_wc = len(summary.split())
399
+ high_info_terms = get_high_info_terms(summary, top_n=15)
400
+ terms_str = ", ".join(high_info_terms)
401
+ # Compute a pseudo ROUGE score as percentage of high info terms to summary words
402
+ rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
403
+
404
+ # Generate mermaid graph code block connecting terms sequentially
405
+ mermaid_code = "```mermaid\nflowchart TD\n"
406
+ for i in range(len(high_info_terms) - 1):
407
+ mermaid_code += f' T{i+1}["{high_info_terms[i]}"] --> T{i+2}["{high_info_terms[i+1]}"]\n'
408
+ mermaid_code += "```"
409
+
410
+ md = f"""
411
+ ## 📄 {title}
412
+
413
+ **Authors:** {authors}
414
+ **Date:** {date}
415
+ **Word Count (Title):** {title_wc} | **Word Count (Summary):** {summary_wc}
416
+
417
+ **Links:** [Abstract]({url}) | [PDF]({pdf_link})
418
+
419
+ **High Info Terms:** {terms_str}
420
+ **ROUGE Score:** {rouge_score}%
421
+
422
+ ### 🎤 TTF Read Aloud
423
+ - **Title:** {title}
424
+ - **Key Terms:** {terms_str}
425
+ - **ROUGE:** {rouge_score}%
426
+
427
+ #### Mermaid Graph of Key Concepts
428
+ {mermaid_code}
429
+
430
+ ---
431
+ """
432
+ return md
433
+
434
+ def create_detailed_paper_md(papers: list) -> str:
435
+ """
436
+ 📝 Create a detailed markdown string for all papers including 5 minute research paper features.
437
+ """
438
+ md_parts = ["# Detailed Research Paper Summary\n"]
439
+ for idx, paper in enumerate(papers, start=1):
440
+ md_parts.append(generate_5min_feature_markdown(paper))
441
+ return "\n".join(md_parts)
442
+
443
+ # ─────────────────────────────────────────────────────────
444
+ # 4. OPTIMIZED AUDIO GENERATION (ASYNC TTS + CACHING)
445
+ # ─────────────────────────────────────────────────────────
446
+
447
+ def clean_for_speech(text: str) -> str:
448
+ """
449
+ 🔉 Clean up text for TTS output with enhanced cleaning.
450
+ Removes markdown, code blocks, links, etc.
451
+ """
452
+ with PerformanceTimer("text_cleaning"):
453
+ # Remove markdown headers
454
+ text = re.sub(r'#+ ', '', text)
455
+ # Remove link formats [text](url)
456
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
457
+ # Remove emphasis markers (*, _, ~, `)
458
+ text = re.sub(r'[*_~`]', '', text)
459
+ # Remove code blocks
460
+ text = re.sub(r'```[\s\S]*?```', '', text)
461
+ text = re.sub(r'`[^`]*`', '', text)
462
+ # Remove excess whitespace
463
+ text = re.sub(r'\s+', ' ', text).replace("\n", " ")
464
+ # Remove hidden S tokens
465
+ text = text.replace("</s>", " ")
466
+ # Remove URLs
467
+ text = re.sub(r'https?://\S+', '', text)
468
+ text = re.sub(r'\(https?://[^\)]+\)', '', text)
469
+ text = text.strip()
470
+ return text
471
+
472
+ async def async_edge_tts_generate(
473
+ text: str,
474
+ voice: str,
475
+ rate: int = 0,
476
+ pitch: int = 0,
477
+ file_format: str = "mp3"
478
+ ) -> Tuple[Optional[str], float]:
479
+ """
480
+ 🎶 Asynchronous TTS generation with caching and performance tracking.
481
+ Returns (filename, generation_time).
482
+ """
483
+ with PerformanceTimer("tts_generation") as timer:
484
+ # ▶ Clean & validate text
485
+ text = clean_for_speech(text)
486
+ if not text.strip():
487
+ return None, 0
488
+
489
+ # ▶ Check cache (avoid regenerating the same TTS)
490
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
491
+ if cache_key in st.session_state['audio_cache']:
492
+ return st.session_state['audio_cache'][cache_key], 0
493
+
494
+ try:
495
+ # ▶ Generate audio
496
+ rate_str = f"{rate:+d}%"
497
+ pitch_str = f"{pitch:+d}Hz"
498
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
499
+
500
+ # ▶ Generate unique filename
501
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
502
+ filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
503
+
504
+ # ▶ Save audio file
505
+ await communicate.save(filename)
506
+
507
+ # ▶ Store in cache
508
+ st.session_state['audio_cache'][cache_key] = filename
509
+
510
+ # ▶ Return path + timing
511
+ return filename, time.time() - timer.start_time
512
+
513
+ except Exception as e:
514
+ st.error(f"❌ Error generating audio: {str(e)}")
515
+ return None, 0
516
+
517
+ # NEW: Define speak_with_edge_tts using our async function and return only the filename
518
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
519
+ """Wrapper for the async TTS generate call. Returns only the filename."""
520
+ result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
521
+ if isinstance(result, tuple):
522
+ return result[0]
523
+ return result
524
+
525
+ async def async_save_qa_with_audio(
526
+ question: str,
527
+ answer: str,
528
+ voice: Optional[str] = None
529
+ ) -> Tuple[str, Optional[str], float, float]:
530
+ """
531
+ 📝 Asynchronously save Q&A to markdown, then generate audio if enabled.
532
+ Returns (md_file, audio_file, md_time, audio_time).
533
+ """
534
+ voice = voice or st.session_state['tts_voice']
535
+
536
+ with PerformanceTimer("qa_save") as timer:
537
+ # ▶ Save Q/A as markdown
538
+ md_start = time.time()
539
+ md_file = create_file(question, answer, "md")
540
+ md_time = time.time() - md_start
541
+
542
+ # ▶ Generate audio (if globally enabled)
543
+ audio_file = None
544
+ audio_time = 0
545
+ if st.session_state['enable_audio']:
546
+ audio_text = f"{question}\n\nAnswer: {answer}"
547
+ audio_file, audio_time = await async_edge_tts_generate(
548
+ audio_text,
549
+ voice=voice,
550
+ file_format=st.session_state['audio_format']
551
+ )
552
+
553
+ return md_file, audio_file, md_time, audio_time
554
+
555
+ def save_qa_with_audio(question, answer, voice=None):
556
+ """Save Q&A to markdown and also generate audio."""
557
+ if not voice:
558
+ voice = st.session_state['tts_voice']
559
+
560
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
561
+ md_file = create_file(question, answer, "md")
562
+ audio_text = f"{question}\n\nAnswer: {answer}"
563
+ audio_file = speak_with_edge_tts(
564
+ audio_text,
565
+ voice=voice,
566
+ file_format=st.session_state['audio_format']
567
+ )
568
+ return md_file, audio_file
569
+
570
+ def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
571
+ """
572
+ ⬇️ Create a download link for a file with caching & error handling.
573
+ """
574
+ with PerformanceTimer("download_link_generation"):
575
+ cache_key = f"dl_{file_path}"
576
+ if cache_key in st.session_state['download_link_cache']:
577
+ return st.session_state['download_link_cache'][cache_key]
578
+
579
+ try:
580
+ with open(file_path, "rb") as f:
581
+ b64 = base64.b64encode(f.read()).decode()
582
+ filename = os.path.basename(file_path)
583
+
584
+ if file_type == "mp3":
585
+ link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎵 Download {filename}</a>'
586
+ elif file_type == "wav":
587
+ link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">🔊 Download {filename}</a>'
588
+ elif file_type == "md":
589
+ link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📝 Download {filename}</a>'
590
+ else:
591
+ link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">⬇️ Download {filename}</a>'
592
+
593
+ st.session_state['download_link_cache'][cache_key] = link
594
+ return link
595
+
596
+ except Exception as e:
597
+ st.error(f"❌ Error creating download link: {str(e)}")
598
+ return ""
599
+
600
+ # NEW: Define play_and_download_audio to play audio and provide a download link.
601
+ def play_and_download_audio(file_path, file_type="mp3"):
602
+ """Streamlit audio + a quick download link."""
603
+ if file_path and os.path.exists(file_path):
604
+ st.audio(file_path)
605
+ dl_link = get_download_link(file_path, file_type=file_type)
606
+ st.markdown(dl_link, unsafe_allow_html=True)
607
+
608
+ # ─────────────────────────────────────────────────────────
609
+ # 5. RESEARCH / ARXIV FUNCTIONS
610
+ # ─────────────────────────────────────────────────────────
611
+
612
+ def parse_arxiv_refs(ref_text: str):
613
+ """
614
+ 📜 Given a multi-line markdown with Arxiv references,
615
+ parse them into a list of dicts: {date, title, url, authors, summary}.
616
+ """
617
+ if not ref_text:
618
+ return []
619
+ results = []
620
+ current_paper = {}
621
+ lines = ref_text.split('\n')
622
+
623
+ for i, line in enumerate(lines):
624
+ if line.count('|') == 2:
625
+ # Found a new paper line
626
+ if current_paper:
627
+ results.append(current_paper)
628
+ if len(results) >= 20:
629
+ break
630
+ try:
631
+ header_parts = line.strip('* ').split('|')
632
+ date = header_parts[0].strip()
633
+ title = header_parts[1].strip()
634
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
635
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
636
+
637
+ current_paper = {
638
+ 'date': date,
639
+ 'title': title,
640
+ 'url': url,
641
+ 'authors': '',
642
+ 'summary': '',
643
+ 'full_audio': None,
644
+ 'download_base64': '',
645
+ }
646
+ except Exception as e:
647
+ st.warning(f"⚠️ Error parsing paper header: {str(e)}")
648
+ current_paper = {}
649
+ continue
650
+ elif current_paper:
651
+ # If authors not set, fill it; otherwise, fill summary
652
+ if not current_paper['authors']:
653
+ current_paper['authors'] = line.strip('* ')
654
+ else:
655
+ if current_paper['summary']:
656
+ current_paper['summary'] += ' ' + line.strip()
657
+ else:
658
+ current_paper['summary'] = line.strip()
659
+
660
+ if current_paper:
661
+ results.append(current_paper)
662
+
663
+ return results[:20]
664
+
665
+ def create_paper_links_md(papers):
666
+ """
667
+ 🔗 Create a minimal .md content linking to each paper's Arxiv URL.
668
+ """
669
+ lines = ["# Paper Links\n"]
670
+ for i, p in enumerate(papers, start=1):
671
+ lines.append(f"{i}. **{p['title']}** — [Arxiv Link]({p['url']})")
672
+ return "\n".join(lines)
673
+
674
+ async def create_paper_audio_files(papers, input_question):
675
+ """
676
+ 🎧 For each paper, generate TTS audio summary and store the path in `paper['full_audio']`.
677
+ Also creates a base64 download link in `paper['download_base64']`.
678
+ """
679
+ for paper in papers:
680
+ try:
681
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
682
+ audio_text = clean_for_speech(audio_text)
683
+ file_format = st.session_state['audio_format']
684
+ audio_file, _ = await async_edge_tts_generate(
685
+ audio_text,
686
+ voice=st.session_state['tts_voice'],
687
+ file_format=file_format
688
+ )
689
+ paper['full_audio'] = audio_file
690
+
691
+ if audio_file:
692
+ # Convert to base64 link
693
+ ext = file_format
694
+ download_link = create_download_link_with_cache(audio_file, file_type=ext)
695
+ paper['download_base64'] = download_link
696
+
697
+ except Exception as e:
698
+ st.warning(f"⚠️ Error processing paper {paper['title']}: {str(e)}")
699
+ paper['full_audio'] = None
700
+ paper['download_base64'] = ''
701
+
702
+ def display_papers(papers, marquee_settings):
703
+ """
704
+ 📑 Display paper info in the main area with marquee + expanders + audio.
705
+ """
706
+ st.write("## 🔎 Research Papers")
707
+ for i, paper in enumerate(papers, start=1):
708
+ marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]} | 📝 {paper['summary'][:200]}"
709
+ display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
710
+
711
+ with st.expander(f"{i}. 📄 {paper['title']}", expanded=True):
712
+ st.markdown(f"**{paper['date']} | {paper['title']}** — [Arxiv Link]({paper['url']})")
713
+ # NEW: Add PDF link next to abstract link
714
+ pdf_link = generate_pdf_link(paper['url'])
715
+ st.markdown(f"**PDF Link:** [PDF]({pdf_link})")
716
+ st.markdown(f"*Authors:* {paper['authors']}")
717
+ st.markdown(paper['summary'])
718
+ # NEW: Append detailed 5min feature markdown for this paper
719
+ st.markdown(generate_5min_feature_markdown(paper))
720
+ if paper.get('full_audio'):
721
+ st.write("📚 **Paper Audio**")
722
+ st.audio(paper['full_audio'])
723
+ if paper['download_base64']:
724
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
725
+
726
+ def display_papers_in_sidebar(papers):
727
+ """
728
+ 🔎 Mirrors the paper listing in the sidebar with expanders, audio, etc.
729
+ """
730
+ st.sidebar.title("🎶 Papers & Audio")
731
+ for i, paper in enumerate(papers, start=1):
732
+ with st.sidebar.expander(f"{i}. {paper['title']}"):
733
+ st.markdown(f"**Arxiv:** [Link]({paper['url']})")
734
+ # NEW: Add PDF link in sidebar as well
735
+ pdf_link = generate_pdf_link(paper['url'])
736
+ st.markdown(f"**PDF:** [PDF]({pdf_link})")
737
+ if paper['full_audio']:
738
+ st.audio(paper['full_audio'])
739
+ if paper['download_base64']:
740
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
741
+ st.markdown(f"**Authors:** {paper['authors']}")
742
+ if paper['summary']:
743
+ st.markdown(f"**Summary:** {paper['summary'][:300]}...")
744
+ # NEW: Show 5min feature summary in sidebar expander
745
+ st.markdown(generate_5min_feature_markdown(paper))
746
+
747
+ # ─────────────────────────────────────────────────────────
748
+ # 6. ZIP FUNCTION
749
+ # ─────────────────────────────────────────────────────────
750
+
751
+ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
752
+ """
753
+ 📦 Zip up all relevant files, generating a short name from high-info terms.
754
+ Returns the zip filename if created, else None.
755
+ """
756
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
757
+ all_files = md_files + mp3_files + wav_files
758
+ if not all_files:
759
+ return None
760
+
761
+ all_content = []
762
+ for f in all_files:
763
+ if f.endswith('.md'):
764
+ with open(f, "r", encoding='utf-8') as file:
765
+ all_content.append(file.read())
766
+ elif f.endswith('.mp3') or f.endswith('.wav'):
767
+ basename = os.path.splitext(os.path.basename(f))[0]
768
+ words = basename.replace('_', ' ')
769
+ all_content.append(words)
770
+
771
+ all_content.append(input_question)
772
+ combined_content = " ".join(all_content)
773
+ info_terms = get_high_info_terms(combined_content, top_n=10)
774
+
775
+ timestamp = format_timestamp_prefix()
776
+ name_text = '-'.join(term for term in info_terms[:5])
777
+ short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
778
+
779
+ with zipfile.ZipFile(short_zip_name, 'w') as z:
780
+ for f in all_files:
781
+ z.write(f)
782
+ return short_zip_name
783
+
784
+ # ─────────────────────────────────────────────────────────
785
+ # 7. MAIN AI LOGIC: LOOKUP & TAB HANDLERS
786
+ # ─────────────────────────────────────────────────────────
787
+
788
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
789
+ titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
790
+ """Main routine that uses Anthropic (Claude) + Gradio ArXiv RAG pipeline."""
791
+ start = time.time()
792
+ ai_constitution = """
793
+ You are a medical and machine learning review board expert and streamlit python and html5 expert. You are tasked with creating a streamlit app.py and requirements.txt for a solution that answers the questions with a working app to demonstrate. You are to use the paper list below to answer the question thinking through step by step how to create a streamlit app.py and requirements.txt for the solution that answers the questions with a working app to demonstrate.
794
+ """
795
+
796
+ # --- 1) Claude API
797
+ client = anthropic.Anthropic(api_key=anthropic_key)
798
+ user_input = q
799
+ response = client.messages.create(
800
+ model="claude-3-sonnet-20240229",
801
+ max_tokens=1000,
802
+ messages=[
803
+ {"role": "user", "content": user_input}
804
+ ])
805
+ st.write("Claude's reply 🧠:")
806
+ st.markdown(response.content[0].text)
807
+
808
+ # Save & produce audio
809
+ result = response.content[0].text
810
+ create_file(q, result)
811
+ md_file, audio_file = save_qa_with_audio(q, result)
812
+ st.subheader("📝 Main Response Audio")
813
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
814
+
815
+ if useArxiv:
816
+ q = q + result # Feed Arxiv the question and Claude's answer for prompt fortification to get better answers and references
817
+ # --- 2) Arxiv RAG
818
+ st.write('Running Arxiv RAG with Claude inputs.')
819
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
820
+ refs = client.predict(
821
+ q,
822
+ 10,
823
+ "Semantic Search",
824
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
825
+ api_name="/update_with_rag_md"
826
+ )[0]
827
+
828
+ result = f"🔎 {q}\n\n{refs}" # use original question q with result paired with paper references for best prompt fortification
829
+
830
+ md_file, audio_file = save_qa_with_audio(q, result)
831
+ st.subheader("📝 Main Response Audio")
832
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
833
+
834
+ # --- 3) Parse + handle papers
835
+ papers = parse_arxiv_refs(refs)
836
+ if papers:
837
+ # Create minimal links page first
838
+ paper_links = create_paper_links_md(papers)
839
+ links_file = create_file(q, paper_links, "md")
840
+ st.markdown(paper_links)
841
+
842
+ # NEW: Create detailed markdown with 5 minute research paper features
843
+ detailed_md = create_detailed_paper_md(papers)
844
+ detailed_file = create_file(q, detailed_md, "md")
845
+ st.markdown(detailed_md)
846
+
847
+ # Then create audio for each paper if desired
848
+ if useArxivAudio:
849
+ asyncio.run(create_paper_audio_files(papers, input_question=q))
850
+
851
+ display_papers(papers, get_marquee_settings()) # scrolling marquee per paper and summary
852
+ display_papers_in_sidebar(papers) # sidebar entry per paper and summary
853
+ else:
854
+ st.warning("No papers found in the response.")
855
+
856
+ # --- 4) Claude API with arxiv list of papers to app.py
857
+ client = anthropic.Anthropic(api_key=anthropic_key)
858
+ user_input = q + '\n\n' + 'Use the reference papers below to answer the question by creating a python streamlit app.py and requirements.txt with python libraries for creating a single app.py application that answers the questions with working code to demonstrate.'+ '\n\n'
859
+ response = client.messages.create(
860
+ model="claude-3-sonnet-20240229",
861
+ max_tokens=1000,
862
+ messages=[
863
+ {"role": "user", "content": user_input}
864
+ ])
865
+ r2 = response.content[0].text
866
+ st.write("Claude's reply 🧠:")
867
+ st.markdown(r2)
868
+
869
+ elapsed = time.time() - start
870
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
871
+ return result
872
+
873
+ def perform_ai_lookup_old(
874
+ q,
875
+ vocal_summary=True,
876
+ extended_refs=False,
877
+ titles_summary=True,
878
+ full_audio=False
879
+ ):
880
+ """
881
+ 🔮 Main routine that uses Anthropic (Claude) + optional Gradio ArXiv RAG pipeline.
882
+ Currently demonstrates calling Anthropic and returning the text.
883
+ """
884
+ with PerformanceTimer("ai_lookup"):
885
+ start = time.time()
886
+
887
+ # ▶ Example call to Anthropic (Claude)
888
+ client = anthropic.Anthropic(api_key=anthropic_key)
889
+ user_input = q
890
+
891
+ # Here we do a minimal prompt, just to show the call
892
+ # (You can enhance your prompt engineering as needed)
893
+ response = client.completions.create(
894
+ model="claude-2",
895
+ max_tokens_to_sample=512,
896
+ prompt=f"{anthropic.HUMAN_PROMPT} {user_input}{anthropic.AI_PROMPT}"
897
+ )
898
+
899
+ result_text = response.completion.strip()
900
+
901
+ # ▶ Print and store
902
+ st.write("### Claude's reply 🧠:")
903
+ st.markdown(result_text)
904
+
905
+ # ▶ We'll add to the chat history
906
+ st.session_state.chat_history.append({"user": q, "claude": result_text})
907
+
908
+ # ▶ Return final text
909
+ end = time.time()
910
+ st.write(f"**Elapsed:** {end - start:.2f}s")
911
+
912
+ return result_text
913
+
914
+ async def process_voice_input(text):
915
+ """
916
+ 🎤 When user sends a voice query, we run the AI lookup + Q/A with audio.
917
+ Then we store the resulting markdown & audio in session or disk.
918
+ """
919
+ if not text:
920
+ return
921
+ st.subheader("🔍 Search Results")
922
+
923
+ # ▶ Call AI
924
+ result = perform_ai_lookup(
925
+ text,
926
+ vocal_summary=True,
927
+ extended_refs=False,
928
+ titles_summary=True,
929
+ full_audio=True
930
+ )
931
+
932
+ # ▶ Save Q&A as Markdown + audio (async)
933
+ md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio(text, result)
934
+
935
+ st.subheader("📝 Generated Files")
936
+ st.write(f"**Markdown:** {md_file} (saved in {md_time:.2f}s)")
937
+ if audio_file:
938
+ st.write(f"**Audio:** {audio_file} (generated in {audio_time:.2f}s)")
939
+ st.audio(audio_file)
940
+ dl_link = create_download_link_with_cache(audio_file, file_type=st.session_state['audio_format'])
941
+ st.markdown(dl_link, unsafe_allow_html=True)
942
+
943
+ def display_voice_tab():
944
+ """
945
+ 🎙️ Display the voice input tab with TTS settings and real-time usage.
946
+ """
947
+
948
+ # ▶ Voice Settings
949
+ st.sidebar.markdown("### 🎤 Voice Settings")
950
+ caption_female = 'Top: 🌸 **Aria** – 🎶 **Jenny** – 🌺 **Sonia** – 🌌 **Natasha** – 🌷 **Clara**'
951
+ caption_male = 'Bottom: 🌟 **Guy** – 🛠️ **Ryan** – 🎻 **William** – 🌟 **Liam**'
952
+
953
+ # Optionally, replace with your own local image or comment out
954
+ try:
955
+ st.sidebar.image('Group Picture - Voices.png', caption=caption_female + ' | ' + caption_male)
956
+ except:
957
+ st.sidebar.write('.')
958
+
959
+ selected_voice = st.sidebar.selectbox(
960
+ "👄 Select TTS Voice:",
961
+ options=EDGE_TTS_VOICES,
962
+ index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
963
+ )
964
+
965
+ st.sidebar.markdown("""
966
+ # 🎙️ Voice Character Agent Selector 🎭
967
+ *Female Voices*:
968
+ - 🌸 **Aria** – Elegant, creative storytelling
969
+ - 🎶 **Jenny** – Friendly, conversational
970
+ - 🌺 **Sonia** – Bold, confident
971
+ - 🌌 **Natasha** – Sophisticated, mysterious
972
+ - 🌷 **Clara** – Cheerful, empathetic
973
+
974
+ *Male Voices*:
975
+ - 🌟 **Guy** – Authoritative, versatile
976
+ - 🛠️ **Ryan** – Approachable, casual
977
+ - 🎻 **William** – Classic, scholarly
978
+ - 🌟 **Liam** – Energetic, engaging
979
+ """)
980
+
981
+ # ▶ Audio Format
982
+ st.markdown("### 🔊 Audio Format")
983
+ selected_format = st.radio(
984
+ "Choose Audio Format:",
985
+ options=["MP3", "WAV"],
986
+ index=0
987
+ )
988
+
989
+ # ▶ Update session state if changed
990
+ if selected_voice != st.session_state['tts_voice']:
991
+ st.session_state['tts_voice'] = selected_voice
992
+ st.rerun()
993
+ if selected_format.lower() != st.session_state['audio_format']:
994
+ st.session_state['audio_format'] = selected_format.lower()
995
+ st.rerun()
996
+
997
+ # ▶ Text Input
998
+ user_text = st.text_area("💬 Message:", height=100)
999
+ user_text = user_text.strip().replace('\n', ' ')
1000
+
1001
+ # ▶ Send Button
1002
+ if st.button("📨 Send"):
1003
+ # Run our process_voice_input as an async function
1004
+ asyncio.run(process_voice_input(user_text))
1005
+
1006
+ # ▶ Chat History
1007
+ st.subheader("📜 Chat History")
1008
+ for c in st.session_state.chat_history:
1009
+ st.write("**You:**", c["user"])
1010
+ st.write("**Response:**", c["claude"])
1011
+
1012
+ def display_file_history_in_sidebar():
1013
+ """
1014
+ 📂 Shows a history of local .md, .mp3, .wav files (newest first),
1015
+ with quick icons and optional download links.
1016
+ """
1017
+ st.sidebar.markdown("---")
1018
+ st.sidebar.markdown("### 📂 File History")
1019
+
1020
+ # ▶ Gather all files
1021
+ md_files = glob.glob("*.md")
1022
+ mp3_files = glob.glob("*.mp3")
1023
+ wav_files = glob.glob("*.wav")
1024
+ all_files = md_files + mp3_files + wav_files
1025
+
1026
+ if not all_files:
1027
+ st.sidebar.write("No files found.")
1028
+ return
1029
+
1030
+ # ▶ Sort newest first
1031
+ all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
1032
+
1033
+ # Group files by their query prefix (timestamp_query)
1034
+ grouped_files = {}
1035
+ for f in all_files:
1036
+ fname = os.path.basename(f)
1037
+ prefix = '_'.join(fname.split('_')[:6]) # Get timestamp part
1038
+ if prefix not in grouped_files:
1039
+ grouped_files[prefix] = {'md': [], 'audio': [], 'loaded': False}
1040
+
1041
+ ext = os.path.splitext(fname)[1].lower()
1042
+ if ext == '.md':
1043
+ grouped_files[prefix]['md'].append(f)
1044
+ elif ext in ['.mp3', '.wav']:
1045
+ grouped_files[prefix]['audio'].append(f)
1046
+
1047
+ # Sort groups by timestamp (newest first)
1048
+ sorted_groups = sorted(grouped_files.items(), key=lambda x: x[0], reverse=True)
1049
+
1050
+ # 🗑⬇️ Sidebar delete all and zip all download
1051
+ col1, col4 = st.sidebar.columns(2)
1052
+ with col1:
1053
+ if st.button("🗑 Delete All"):
1054
+ for f in all_files:
1055
+ os.remove(f)
1056
+ st.rerun()
1057
+ st.session_state.should_rerun = True
1058
+ with col4:
1059
+ if st.button("⬇️ Zip All"):
1060
+ zip_name = create_zip_of_files(md_files, mp3_files, wav_files,
1061
+ st.session_state.get('last_query', ''))
1062
+ if zip_name:
1063
+ st.sidebar.markdown(get_download_link(zip_name, "zip"),
1064
+ unsafe_allow_html=True)
1065
+
1066
+ # Display grouped files
1067
+ for prefix, files in sorted_groups:
1068
+ # Get a preview of content from first MD file
1069
+ preview = ""
1070
+ if files['md']:
1071
+ with open(files['md'][0], "r", encoding="utf-8") as f:
1072
+ preview = f.read(200).replace("\n", " ")
1073
+ if len(preview) > 200:
1074
+ preview += "..."
1075
+ # Create unique key for this group
1076
+ group_key = f"group_{prefix}"
1077
+ if group_key not in st.session_state:
1078
+ st.session_state[group_key] = False
1079
+
1080
+ # Display group expander
1081
+ with st.sidebar.expander(f"📑 Query Group: {prefix}"):
1082
+ st.write("**Preview:**")
1083
+ st.write(preview)
1084
+
1085
+ # Load full content button
1086
+ if st.button("📖 View Full Content", key=f"btn_{prefix}"):
1087
+ st.session_state[group_key] = True
1088
+
1089
+ # Only show full content and audio if button was clicked
1090
+ if st.session_state[group_key]:
1091
+ # Display markdown files
1092
+ for md_file in files['md']:
1093
+ with open(md_file, "r", encoding="utf-8") as f:
1094
+ content = f.read()
1095
+ st.markdown("**Full Content:**")
1096
+ st.markdown(content)
1097
+ st.markdown(get_download_link(md_file, file_type="md"),
1098
+ unsafe_allow_html=True)
1099
+
1100
+ # Display audio files
1101
+ usePlaySidebar=False
1102
+ if usePlaySidebar:
1103
+ for audio_file in files['audio']:
1104
+ ext = os.path.splitext(audio_file)[1].replace('.', '')
1105
+ st.audio(audio_file)
1106
+ st.markdown(get_download_link(audio_file, file_type=ext),
1107
+ unsafe_allow_html=True)
1108
+
1109
+ def main():
1110
+ # ▶ 1) Setup marquee UI in the sidebar
1111
+ update_marquee_settings_ui()
1112
+ marquee_settings = get_marquee_settings()
1113
+
1114
+ # ▶ 2) Display the marquee welcome
1115
+ display_marquee(
1116
+ st.session_state['marquee_content'],
1117
+ {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
1118
+ key_suffix="welcome"
1119
+ )
1120
+
1121
+ # ▶ 3) Main action tabs and model use choices
1122
+ tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
1123
+ horizontal=True)
1124
+
1125
+ useArxiv = st.checkbox("Search Arxiv for Research Paper Answers", value=True)
1126
+ useArxivAudio = st.checkbox("Generate Audio File for Research Paper Answers", value=False)
1127
+
1128
+ # ▶ 4) Show or hide custom component (optional example)
1129
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
1130
+ val = mycomponent(my_input_value="Hello from MyComponent")
1131
+
1132
+ if val:
1133
+ val_stripped = val.replace('\\n', ' ')
1134
+ edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
1135
+ run_option = st.selectbox("Model:", ["Arxiv", "Other (demo)"])
1136
+ col1, col2 = st.columns(2)
1137
+ with col1:
1138
+ autorun = st.checkbox("⚙ AutoRun", value=True)
1139
+ with col2:
1140
+ full_audio = st.checkbox("📚FullAudio", value=False)
1141
+
1142
+ input_changed = (val != st.session_state.old_val)
1143
+
1144
+ if autorun and input_changed:
1145
+ st.session_state.old_val = val
1146
+ st.session_state.last_query = edited_input
1147
+ perform_ai_lookup(edited_input,
1148
+ vocal_summary=True,
1149
+ extended_refs=False,
1150
+ titles_summary=True,
1151
+ full_audio=full_audio, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
1152
+ else:
1153
+ if st.button("▶ Run"):
1154
+ st.session_state.old_val = val
1155
+ st.session_state.last_query = edited_input
1156
+ perform_ai_lookup(edited_input,
1157
+ vocal_summary=True,
1158
+ extended_refs=False,
1159
+ titles_summary=True,
1160
+ full_audio=full_audio, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
1161
+
1162
+ # ─────────────────────────────────────────────────────────
1163
+ # TAB: ArXiv
1164
+ # ─────────────────────────────────────────────────────────
1165
+ if tab_main == "🔍 ArXiv":
1166
+ st.subheader("🔍 Query ArXiv")
1167
+ q = st.text_input("🔍 Query:", key="arxiv_query")
1168
+
1169
+ st.markdown("### 🎛 Options")
1170
+ vocal_summary = st.checkbox("🎙ShortAudio", value=True, key="option_vocal_summary")
1171
+ extended_refs = st.checkbox("📜LongRefs", value=False, key="option_extended_refs")
1172
+ titles_summary = st.checkbox("🔖TitlesOnly", value=True, key="option_titles_summary")
1173
+ full_audio = st.checkbox("📚FullAudio", value=False, key="option_full_audio")
1174
+ full_transcript = st.checkbox("🧾FullTranscript", value=False, key="option_full_transcript")
1175
+
1176
+ if q and st.button("🔍Run"):
1177
+ st.session_state.last_query = q
1178
+ result = perform_ai_lookup(q,
1179
+ vocal_summary=vocal_summary,
1180
+ extended_refs=extended_refs,
1181
+ titles_summary=titles_summary,
1182
+ full_audio=full_audio)
1183
+ if full_transcript:
1184
+ create_file(q, result, "md")
1185
+
1186
+ # ─────────────────────────────────────────────────────────
1187
+ # TAB: Voice
1188
+ # ─────────────────────────────────────────────────────────
1189
+ elif tab_main == "🎤 Voice":
1190
+ display_voice_tab()
1191
+
1192
+ # ─────────────────────────────────────────────────────────
1193
+ # TAB: Media
1194
+ # ─────────────────────────────────────────────────────────
1195
+ elif tab_main == "📸 Media":
1196
+ st.header("📸 Media Gallery")
1197
+ tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"])
1198
+
1199
+ # ▶ AUDIO sub-tab
1200
+ with tabs[0]:
1201
+ st.subheader("🎵 Audio Files")
1202
+ audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
1203
+ if audio_files:
1204
+ for a in audio_files:
1205
+ with st.expander(os.path.basename(a)):
1206
+ st.audio(a)
1207
+ ext = os.path.splitext(a)[1].replace('.', '')
1208
+ dl_link = create_download_link_with_cache(a, file_type=ext)
1209
+ st.markdown(dl_link, unsafe_allow_html=True)
1210
+ else:
1211
+ st.write("No audio files found.")
1212
+
1213
+ # ▶ IMAGES sub-tab
1214
+ with tabs[1]:
1215
+ st.subheader("🖼 Image Files")
1216
+ imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
1217
+ if imgs:
1218
+ c = st.slider("Cols", 1, 5, 3, key="cols_images")
1219
+ cols = st.columns(c)
1220
+ for i, f in enumerate(imgs):
1221
+ with cols[i % c]:
1222
+ st.image(Image.open(f), use_container_width=True)
1223
+ else:
1224
+ st.write("No images found.")
1225
+
1226
+ # ▶ VIDEO sub-tab
1227
+ with tabs[2]:
1228
+ st.subheader("🎥 Video Files")
1229
+ vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
1230
+ if vids:
1231
+ for v in vids:
1232
+ with st.expander(os.path.basename(v)):
1233
+ st.video(v)
1234
+ else:
1235
+ st.write("No videos found.")
1236
+
1237
+ # ─────────────────────────────────────────────────────────
1238
+ # TAB: Editor
1239
+ # ─────────────────────────────────────────────────────────
1240
+ elif tab_main == "📝 Editor":
1241
+ st.write("### 📝 File Editor (Minimal Demo)")
1242
+ st.write("Select or create a file to edit. More advanced features can be added as needed.")
1243
+
1244
+ # ─────────────────────────────────────────────────────────
1245
+ # SIDEBAR: FILE HISTORY + PERFORMANCE METRICS
1246
+ # ─────────────────────────────────────────────────────────
1247
+ display_file_history_in_sidebar()
1248
+ log_performance_metrics()
1249
+
1250
+ # ▶ Some light CSS styling
1251
+ st.markdown("""
1252
+ <style>
1253
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
1254
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
1255
+ .stButton>button { margin-right: 0.5rem; }
1256
+ </style>
1257
+ """, unsafe_allow_html=True)
1258
+
1259
+ # ▶ Rerun if needed
1260
+ if st.session_state.should_rerun:
1261
+ st.session_state.should_rerun = False
1262
+ st.rerun()
1263
+
1264
+ if __name__ == "__main__":
1265
+ main()