awacke1 commited on
Commit
cb1a7fe
·
verified ·
1 Parent(s): 386c5c6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1282 -0
app.py ADDED
@@ -0,0 +1,1282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic
3
+ import openai
4
+ import base64
5
+ import cv2
6
+ import glob
7
+ import json
8
+ import math
9
+ import os
10
+ import pytz
11
+ import random
12
+ import re
13
+ import requests
14
+ #import textract
15
+ import time
16
+ import zipfile
17
+ import plotly.graph_objects as go
18
+ import streamlit.components.v1 as components
19
+ from datetime import datetime
20
+ from audio_recorder_streamlit import audio_recorder
21
+ from bs4 import BeautifulSoup
22
+ from collections import defaultdict, deque, Counter
23
+ from dotenv import load_dotenv
24
+ from gradio_client import Client
25
+ from huggingface_hub import InferenceClient
26
+ from io import BytesIO
27
+ from PIL import Image
28
+ from PyPDF2 import PdfReader
29
+ from urllib.parse import quote
30
+ from xml.etree import ElementTree as ET
31
+ from openai import OpenAI
32
+ import extra_streamlit_components as stx
33
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
34
+ import asyncio
35
+ import edge_tts
36
+ from streamlit_marquee import streamlit_marquee
37
+ from typing import Tuple, Optional
38
+ import pandas as pd
39
+
40
+ # ─────────────────────────────────────────────────────────
41
+ # 1. CORE CONFIGURATION & SETUP
42
+ # ─────────────────────────────────────────────────────────
43
+
44
+ st.set_page_config(
45
+ page_title="🚲TalkingAIResearcher🏆",
46
+ page_icon="🚲🏆",
47
+ layout="wide",
48
+ initial_sidebar_state="auto",
49
+ menu_items={
50
+ 'Get Help': 'https://huggingface.co/awacke1',
51
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
52
+ 'About': "🚲TalkingAIResearcher🏆"
53
+ }
54
+ )
55
+ load_dotenv()
56
+
57
+ # ▶ Available English voices for Edge TTS
58
+ EDGE_TTS_VOICES = [
59
+ "en-US-AriaNeural",
60
+ "en-US-GuyNeural",
61
+ "en-US-JennyNeural",
62
+ "en-GB-SoniaNeural",
63
+ "en-GB-RyanNeural",
64
+ "en-AU-NatashaNeural",
65
+ "en-AU-WilliamNeural",
66
+ "en-CA-ClaraNeural",
67
+ "en-CA-LiamNeural"
68
+ ]
69
+
70
+ # ▶ Initialize Session State
71
+ if 'marquee_settings' not in st.session_state:
72
+ st.session_state['marquee_settings'] = {
73
+ "background": "#1E1E1E",
74
+ "color": "#FFFFFF",
75
+ "font-size": "14px",
76
+ "animationDuration": "20s",
77
+ "width": "100%",
78
+ "lineHeight": "35px"
79
+ }
80
+ if 'tts_voice' not in st.session_state:
81
+ st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
82
+ if 'audio_format' not in st.session_state:
83
+ st.session_state['audio_format'] = 'mp3'
84
+ if 'transcript_history' not in st.session_state:
85
+ st.session_state['transcript_history'] = []
86
+ if 'chat_history' not in st.session_state:
87
+ st.session_state['chat_history'] = []
88
+ if 'openai_model' not in st.session_state:
89
+ st.session_state['openai_model'] = "gpt-4o-2024-05-13"
90
+ if 'messages' not in st.session_state:
91
+ st.session_state['messages'] = []
92
+ if 'last_voice_input' not in st.session_state:
93
+ st.session_state['last_voice_input'] = ""
94
+ if 'editing_file' not in st.session_state:
95
+ st.session_state['editing_file'] = None
96
+ if 'edit_new_name' not in st.session_state:
97
+ st.session_state['edit_new_name'] = ""
98
+ if 'edit_new_content' not in st.session_state:
99
+ st.session_state['edit_new_content'] = ""
100
+ if 'viewing_prefix' not in st.session_state:
101
+ st.session_state['viewing_prefix'] = None
102
+ if 'should_rerun' not in st.session_state:
103
+ st.session_state['should_rerun'] = False
104
+ if 'old_val' not in st.session_state:
105
+ st.session_state['old_val'] = None
106
+ if 'last_query' not in st.session_state:
107
+ st.session_state['last_query'] = ""
108
+ if 'marquee_content' not in st.session_state:
109
+ st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
110
+
111
+ # ▶ Additional keys for performance, caching, etc.
112
+ if 'audio_cache' not in st.session_state:
113
+ st.session_state['audio_cache'] = {}
114
+ if 'download_link_cache' not in st.session_state:
115
+ st.session_state['download_link_cache'] = {}
116
+ if 'operation_timings' not in st.session_state:
117
+ st.session_state['operation_timings'] = {}
118
+ if 'performance_metrics' not in st.session_state:
119
+ st.session_state['performance_metrics'] = defaultdict(list)
120
+ if 'enable_audio' not in st.session_state:
121
+ st.session_state['enable_audio'] = True # Turn TTS on/off
122
+
123
+ # ▶ API Keys
124
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
125
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
126
+ xai_key = os.getenv('xai',"")
127
+ if 'OPENAI_API_KEY' in st.secrets:
128
+ openai_api_key = st.secrets['OPENAI_API_KEY']
129
+ if 'ANTHROPIC_API_KEY' in st.secrets:
130
+ anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
131
+
132
+ openai.api_key = openai_api_key
133
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
134
+ HF_KEY = os.getenv('HF_KEY')
135
+ API_URL = os.getenv('API_URL')
136
+
137
+ # ▶ Helper constants
138
+ FILE_EMOJIS = {
139
+ "md": "📝",
140
+ "mp3": "🎵",
141
+ "wav": "🔊"
142
+ }
143
+
144
+ # ──────────────────────────────��──────────────────────────
145
+ # 2. PERFORMANCE MONITORING & TIMING
146
+ # ─────────────────────────────────────────────────────────
147
+
148
+ class PerformanceTimer:
149
+ """
150
+ ⏱️ A context manager for timing operations with automatic logging.
151
+ Usage:
152
+ with PerformanceTimer("my_operation"):
153
+ # do something
154
+ The duration is stored into `st.session_state['operation_timings']`
155
+ and appended to the `performance_metrics` list.
156
+ """
157
+ def __init__(self, operation_name: str):
158
+ self.operation_name = operation_name
159
+ self.start_time = None
160
+
161
+ def __enter__(self):
162
+ self.start_time = time.time()
163
+ return self
164
+
165
+ def __exit__(self, exc_type, exc_val, exc_tb):
166
+ if not exc_type: # Only log if no exception occurred
167
+ duration = time.time() - self.start_time
168
+ st.session_state['operation_timings'][self.operation_name] = duration
169
+ st.session_state['performance_metrics'][self.operation_name].append(duration)
170
+
171
+ def log_performance_metrics():
172
+ """
173
+ 📈 Display performance metrics in the sidebar, including a timing breakdown
174
+ and a small bar chart of average times.
175
+ """
176
+ st.sidebar.markdown("### ⏱️ Performance Metrics")
177
+
178
+ metrics = st.session_state['operation_timings']
179
+ if metrics:
180
+ total_time = sum(metrics.values())
181
+ st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
182
+
183
+ # Break down each operation time
184
+ for operation, duration in metrics.items():
185
+ percentage = (duration / total_time) * 100
186
+ st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
187
+
188
+ # Show timing history chart
189
+ history_data = []
190
+ for op, times in st.session_state['performance_metrics'].items():
191
+ if times: # Only if we have data
192
+ avg_time = sum(times) / len(times)
193
+ history_data.append({"Operation": op, "Avg Time (s)": avg_time})
194
+
195
+ if history_data:
196
+ st.sidebar.markdown("### 📊 Timing History (Avg)")
197
+ chart_data = pd.DataFrame(history_data)
198
+ st.sidebar.bar_chart(chart_data.set_index("Operation"))
199
+
200
+ # ─────────────────────────────────────────────────────────
201
+ # 3. HELPER FUNCTIONS (FILENAMES, LINKS, MARQUEE, ETC.)
202
+ # ─────────────────────────────────────────────────────────
203
+
204
+ def get_central_time():
205
+ """🌎 Get current time in US Central timezone."""
206
+ central = pytz.timezone('US/Central')
207
+ return datetime.now(central)
208
+
209
+ def format_timestamp_prefix():
210
+ """📅 Generate a timestamp prefix"""
211
+ ct = get_central_time()
212
+ #return ct.strftime("%m_%d_%y_%I_%M_%p")
213
+ return ct.strftime("%Y%m%d_%H%M%S")
214
+
215
+ def initialize_marquee_settings():
216
+ """🌈 Initialize marquee defaults if needed."""
217
+ if 'marquee_settings' not in st.session_state:
218
+ st.session_state['marquee_settings'] = {
219
+ "background": "#1E1E1E",
220
+ "color": "#FFFFFF",
221
+ "font-size": "14px",
222
+ "animationDuration": "20s",
223
+ "width": "100%",
224
+ "lineHeight": "35px"
225
+ }
226
+
227
+ def get_marquee_settings():
228
+ """🔧 Retrieve marquee settings from session."""
229
+ initialize_marquee_settings()
230
+ return st.session_state['marquee_settings']
231
+
232
+ def update_marquee_settings_ui():
233
+ """🖌 Add color pickers & sliders for marquee config in the sidebar."""
234
+ st.sidebar.markdown("### 🎯 Marquee Settings")
235
+ cols = st.sidebar.columns(2)
236
+ with cols[0]:
237
+ bg_color = st.color_picker("🎨 Background",
238
+ st.session_state['marquee_settings']["background"],
239
+ key="bg_color_picker")
240
+ text_color = st.color_picker("✍️ Text",
241
+ st.session_state['marquee_settings']["color"],
242
+ key="text_color_picker")
243
+ with cols[1]:
244
+ font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider")
245
+ duration = st.slider("⏱️ Speed (secs)", 1, 20, 20, key="duration_slider")
246
+
247
+ st.session_state['marquee_settings'].update({
248
+ "background": bg_color,
249
+ "color": text_color,
250
+ "font-size": f"{font_size}px",
251
+ "animationDuration": f"{duration}s"
252
+ })
253
+
254
+ def display_marquee(text, settings, key_suffix=""):
255
+ """
256
+ 🎉 Show a marquee text with style from the marquee settings.
257
+ Automatically truncates text to ~280 chars to avoid overflow.
258
+ """
259
+ truncated_text = text[:280] + "..." if len(text) > 280 else text
260
+ streamlit_marquee(
261
+ content=truncated_text,
262
+ **settings,
263
+ key=f"marquee_{key_suffix}"
264
+ )
265
+ st.write("")
266
+
267
+ def get_high_info_terms(text: str, top_n=10) -> list:
268
+ """
269
+ 📌 Extract top_n frequent words & bigrams (excluding common stopwords).
270
+ Useful for generating short descriptive keywords from Q/A content.
271
+ """
272
+ stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
273
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
274
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
275
+ combined = words + bi_grams
276
+ filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2]
277
+ counter = Counter(filtered)
278
+ return [term for term, freq in counter.most_common(top_n)]
279
+
280
+ def clean_text_for_filename(text: str) -> str:
281
+ """
282
+ 🏷️ Remove special chars & short unhelpful words from text for safer filenames.
283
+ Returns a lowercased, underscore-joined token string.
284
+ """
285
+ text = text.lower()
286
+ text = re.sub(r'[^\w\s-]', '', text)
287
+ words = text.split()
288
+ stop_short = set(['the', 'and', 'for', 'with', 'this', 'that', 'ai', 'library'])
289
+ filtered = [w for w in words if len(w) > 3 and w not in stop_short]
290
+ return '_'.join(filtered)[:200]
291
+
292
+ def generate_filename(prompt, response, file_type="md", max_length=200):
293
+ """
294
+ 📁 Create a shortened filename based on prompt+response content:
295
+ 1) Extract top info terms,
296
+ 2) Combine snippet from prompt+response,
297
+ 3) Remove duplicates,
298
+ 4) Truncate if needed.
299
+ """
300
+ prefix = format_timestamp_prefix() + "_"
301
+ combined_text = (prompt + " " + response)[:200]
302
+ info_terms = get_high_info_terms(combined_text, top_n=5)
303
+ snippet = (prompt[:40] + " " + response[:40]).strip()
304
+ snippet_cleaned = clean_text_for_filename(snippet)
305
+
306
+ # Remove duplicates
307
+ name_parts = info_terms + [snippet_cleaned]
308
+ seen = set()
309
+ unique_parts = []
310
+ for part in name_parts:
311
+ if part not in seen:
312
+ seen.add(part)
313
+ unique_parts.append(part)
314
+
315
+ full_name = '_'.join(unique_parts).strip('_')
316
+ leftover_chars = max_length - len(prefix) - len(file_type) - 1
317
+ if len(full_name) > leftover_chars:
318
+ full_name = full_name[:leftover_chars]
319
+
320
+ return f"{prefix}{full_name}.{file_type}"
321
+
322
+ def create_file(prompt, response, file_type="md"):
323
+ """
324
+ 📝 Create a text file from prompt + response with a sanitized filename.
325
+ Returns the created filename.
326
+ """
327
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
328
+ with open(filename, 'w', encoding='utf-8') as f:
329
+ f.write(prompt + "\n\n" + response)
330
+ return filename
331
+
332
+
333
+
334
+ def get_download_link(file, file_type="zip"):
335
+ """
336
+ Convert a file to base64 and return an HTML link for download.
337
+ """
338
+ with open(file, "rb") as f:
339
+ b64 = base64.b64encode(f.read()).decode()
340
+ if file_type == "zip":
341
+ return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">📂 Download {os.path.basename(file)}</a>'
342
+ elif file_type == "mp3":
343
+ return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">🎵 Download {os.path.basename(file)}</a>'
344
+ elif file_type == "wav":
345
+ return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">🔊 Download {os.path.basename(file)}</a>'
346
+ elif file_type == "md":
347
+ return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">📝 Download {os.path.basename(file)}</a>'
348
+ else:
349
+ return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
350
+
351
+ def clean_for_speech(text: str) -> str:
352
+ """Clean up text for TTS output."""
353
+ text = text.replace("\n", " ")
354
+ text = text.replace("</s>", " ")
355
+ text = text.replace("#", "")
356
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
357
+ text = re.sub(r"\s+", " ", text).strip()
358
+ return text
359
+
360
+ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
361
+ """Async TTS generation with edge-tts library."""
362
+ text = clean_for_speech(text)
363
+ if not text.strip():
364
+ return None
365
+ rate_str = f"{rate:+d}%"
366
+ pitch_str = f"{pitch:+d}Hz"
367
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
368
+ out_fn = generate_filename(text, text, file_type=file_format)
369
+ await communicate.save(out_fn)
370
+ return out_fn
371
+
372
+ def sync_edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
373
+ """Async TTS generation with edge-tts library."""
374
+ text = clean_for_speech(text)
375
+ if not text.strip():
376
+ return None
377
+ rate_str = f"{rate:+d}%"
378
+ pitch_str = f"{pitch:+d}Hz"
379
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
380
+ out_fn = generate_filename(text, text, file_type=file_format)
381
+ #await communicate.save(out_fn)
382
+ return out_fn
383
+
384
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
385
+ """Wrapper for the async TTS generate call."""
386
+ await edge_tts_generate_audio(text, voice, rate, pitch, file_format)
387
+ return
388
+
389
+ def play_and_download_audio(file_path, file_type="mp3"):
390
+ """Streamlit audio + a quick download link."""
391
+ if file_path and os.path.exists(file_path):
392
+ st.audio(file_path)
393
+ dl_link = get_download_link(file_path, file_type=file_type)
394
+ st.markdown(dl_link, unsafe_allow_html=True)
395
+
396
+ def save_qa_with_audio(question, answer, voice=None):
397
+ """Save Q&A to markdown and also generate audio."""
398
+ if not voice:
399
+ voice = st.session_state['tts_voice']
400
+
401
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
402
+ md_file = create_file(question, answer, "md")
403
+ audio_text = f"{question}\n\nAnswer: {answer}"
404
+ audio_file = speak_with_edge_tts(
405
+ audio_text,
406
+ voice=voice,
407
+ file_format=st.session_state['audio_format']
408
+ )
409
+ return md_file, audio_file
410
+
411
+
412
+ # ─────────────────────────────────────────────────────────
413
+ # 4. OPTIMIZED AUDIO GENERATION (ASYNC TTS + CACHING)
414
+ # ─────────────────────────────────────────────────────────
415
+
416
+ def clean_for_speech(text: str) -> str:
417
+ """
418
+ 🔉 Clean up text for TTS output with enhanced cleaning.
419
+ Removes markdown, code blocks, links, etc.
420
+ """
421
+ with PerformanceTimer("text_cleaning"):
422
+ # Remove markdown headers
423
+ text = re.sub(r'#+ ', '', text)
424
+ # Remove link formats [text](url)
425
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
426
+ # Remove emphasis markers (*, _, ~, `)
427
+ text = re.sub(r'[*_~`]', '', text)
428
+ # Remove code blocks
429
+ text = re.sub(r'```[\s\S]*?```', '', text)
430
+ text = re.sub(r'`[^`]*`', '', text)
431
+ # Remove excess whitespace
432
+ text = re.sub(r'\s+', ' ', text).replace("\n", " ")
433
+ # Remove hidden S tokens
434
+ text = text.replace("</s>", " ")
435
+ # Remove URLs
436
+ text = re.sub(r'https?://\S+', '', text)
437
+ text = re.sub(r'\(https?://[^\)]+\)', '', text)
438
+ text = text.strip()
439
+ return text
440
+
441
+ async def async_edge_tts_generate(
442
+ text: str,
443
+ voice: str,
444
+ rate: int = 0,
445
+ pitch: int = 0,
446
+ file_format: str = "mp3"
447
+ ) -> Tuple[Optional[str], float]:
448
+ """
449
+ 🎶 Asynchronous TTS generation with caching and performance tracking.
450
+ Returns (filename, generation_time).
451
+ """
452
+ with PerformanceTimer("tts_generation") as timer:
453
+ # ▶ Clean & validate text
454
+ text = clean_for_speech(text)
455
+ if not text.strip():
456
+ return None, 0
457
+
458
+ # ▶ Check cache (avoid regenerating the same TTS)
459
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
460
+ if cache_key in st.session_state['audio_cache']:
461
+ return st.session_state['audio_cache'][cache_key], 0
462
+
463
+ try:
464
+ # ▶ Generate audio
465
+ rate_str = f"{rate:+d}%"
466
+ pitch_str = f"{pitch:+d}Hz"
467
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
468
+
469
+ # ▶ Generate unique filename
470
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
471
+ filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
472
+
473
+ # ▶ Save audio file
474
+ await communicate.save(filename)
475
+
476
+ # ▶ Store in cache
477
+ st.session_state['audio_cache'][cache_key] = filename
478
+
479
+ # ▶ Return path + timing
480
+ return filename, time.time() - timer.start_time
481
+
482
+ except Exception as e:
483
+ st.error(f"❌ Error generating audio: {str(e)}")
484
+ return None, 0
485
+
486
+ async def async_save_qa_with_audio(
487
+ question: str,
488
+ answer: str,
489
+ voice: Optional[str] = None
490
+ ) -> Tuple[str, Optional[str], float, float]:
491
+ """
492
+ 📝 Asynchronously save Q&A to markdown, then generate audio if enabled.
493
+ Returns (md_file, audio_file, md_time, audio_time).
494
+ """
495
+ voice = voice or st.session_state['tts_voice']
496
+
497
+ with PerformanceTimer("qa_save") as timer:
498
+ # ▶ Save Q/A as markdown
499
+ md_start = time.time()
500
+ md_file = create_file(question, answer, "md")
501
+ md_time = time.time() - md_start
502
+
503
+ # ▶ Generate audio (if globally enabled)
504
+ audio_file = None
505
+ audio_time = 0
506
+ if st.session_state['enable_audio']:
507
+ audio_text = f"{question}\n\nAnswer: {answer}"
508
+ audio_file, audio_time = await async_edge_tts_generate(
509
+ audio_text,
510
+ voice=voice,
511
+ file_format=st.session_state['audio_format']
512
+ )
513
+
514
+ return md_file, audio_file, md_time, audio_time
515
+
516
+ def save_qa_with_audio(question, answer, voice=None):
517
+ """Save Q&A to markdown and also generate audio."""
518
+ if not voice:
519
+ voice = st.session_state['tts_voice']
520
+
521
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
522
+ md_file = create_file(question, answer, "md")
523
+ audio_text = f"{question}\n\nAnswer: {answer}"
524
+ audio_file = speak_with_edge_tts(
525
+ audio_text,
526
+ voice=voice,
527
+ file_format=st.session_state['audio_format']
528
+ )
529
+ return md_file, audio_file
530
+
531
+
532
+
533
+
534
+ def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
535
+ """
536
+ ⬇️ Create a download link for a file with caching & error handling.
537
+ """
538
+ with PerformanceTimer("download_link_generation"):
539
+ cache_key = f"dl_{file_path}"
540
+ if cache_key in st.session_state['download_link_cache']:
541
+ return st.session_state['download_link_cache'][cache_key]
542
+
543
+ try:
544
+ with open(file_path, "rb") as f:
545
+ b64 = base64.b64encode(f.read()).decode()
546
+ filename = os.path.basename(file_path)
547
+
548
+ if file_type == "mp3":
549
+ link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎵 Download {filename}</a>'
550
+ elif file_type == "wav":
551
+ link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">🔊 Download {filename}</a>'
552
+ elif file_type == "md":
553
+ link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📝 Download {filename}</a>'
554
+ else:
555
+ link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">⬇️ Download {filename}</a>'
556
+
557
+ st.session_state['download_link_cache'][cache_key] = link
558
+ return link
559
+
560
+ except Exception as e:
561
+ st.error(f"❌ Error creating download link: {str(e)}")
562
+ return ""
563
+
564
+ # ─────────────────────────────────────────────────────────
565
+ # 5. RESEARCH / ARXIV FUNCTIONS
566
+ # ─────────────────────────────────────────────────────────
567
+
568
+ def parse_arxiv_refs(ref_text: str):
569
+ """
570
+ 📜 Given a multi-line markdown with Arxiv references,
571
+ parse them into a list of dicts: {date, title, url, authors, summary}.
572
+ """
573
+ if not ref_text:
574
+ return []
575
+ results = []
576
+ current_paper = {}
577
+ lines = ref_text.split('\n')
578
+
579
+ for i, line in enumerate(lines):
580
+ if line.count('|') == 2:
581
+ # Found a new paper line
582
+ if current_paper:
583
+ results.append(current_paper)
584
+ if len(results) >= 20:
585
+ break
586
+ try:
587
+ header_parts = line.strip('* ').split('|')
588
+ date = header_parts[0].strip()
589
+ title = header_parts[1].strip()
590
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
591
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
592
+
593
+ current_paper = {
594
+ 'date': date,
595
+ 'title': title,
596
+ 'url': url,
597
+ 'authors': '',
598
+ 'summary': '',
599
+ 'full_audio': None,
600
+ 'download_base64': '',
601
+ }
602
+ except Exception as e:
603
+ st.warning(f"⚠️ Error parsing paper header: {str(e)}")
604
+ current_paper = {}
605
+ continue
606
+ elif current_paper:
607
+ # If authors not set, fill it; otherwise, fill summary
608
+ if not current_paper['authors']:
609
+ current_paper['authors'] = line.strip('* ')
610
+ else:
611
+ if current_paper['summary']:
612
+ current_paper['summary'] += ' ' + line.strip()
613
+ else:
614
+ current_paper['summary'] = line.strip()
615
+
616
+ if current_paper:
617
+ results.append(current_paper)
618
+
619
+ return results[:20]
620
+
621
+ def create_paper_links_md(papers):
622
+ """
623
+ 🔗 Create a minimal .md content linking to each paper's Arxiv URL.
624
+ """
625
+ lines = ["# Paper Links\n"]
626
+ for i, p in enumerate(papers, start=1):
627
+ lines.append(f"{i}. **{p['title']}** — [Arxiv]({p['url']})")
628
+ return "\n".join(lines)
629
+
630
+ async def create_paper_audio_files(papers, input_question):
631
+ """
632
+ 🎧 For each paper, generate TTS audio summary and store the path in `paper['full_audio']`.
633
+ Also creates a base64 download link in `paper['download_base64']`.
634
+ """
635
+ for paper in papers:
636
+ try:
637
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
638
+ audio_text = clean_for_speech(audio_text)
639
+ file_format = st.session_state['audio_format']
640
+ audio_file, _ = await async_edge_tts_generate(
641
+ audio_text,
642
+ voice=st.session_state['tts_voice'],
643
+ file_format=file_format
644
+ )
645
+ paper['full_audio'] = audio_file
646
+
647
+ if audio_file:
648
+ # Convert to base64 link
649
+ ext = file_format
650
+ download_link = create_download_link_with_cache(audio_file, file_type=ext)
651
+ paper['download_base64'] = download_link
652
+
653
+ except Exception as e:
654
+ st.warning(f"⚠️ Error processing paper {paper['title']}: {str(e)}")
655
+ paper['full_audio'] = None
656
+ paper['download_base64'] = ''
657
+
658
+ def display_papers(papers, marquee_settings):
659
+ """
660
+ 📑 Display paper info in the main area with marquee + expanders + audio.
661
+ """
662
+ st.write("## 🔎 Research Papers")
663
+ for i, paper in enumerate(papers, start=1):
664
+ marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]} | 📝 {paper['summary'][:200]}"
665
+ display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
666
+
667
+ with st.expander(f"{i}. 📄 {paper['title']}", expanded=True):
668
+ st.markdown(f"**{paper['date']} | {paper['title']}** — [Arxiv Link]({paper['url']})")
669
+ st.markdown(f"*Authors:* {paper['authors']}")
670
+ st.markdown(paper['summary'])
671
+ if paper.get('full_audio'):
672
+ st.write("📚 **Paper Audio**")
673
+ st.audio(paper['full_audio'])
674
+ if paper['download_base64']:
675
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
676
+
677
+ def display_papers_in_sidebar(papers):
678
+ """
679
+ 🔎 Mirrors the paper listing in the sidebar with expanders, audio, etc.
680
+ """
681
+ st.sidebar.title("🎶 Papers & Audio")
682
+ for i, paper in enumerate(papers, start=1):
683
+ with st.sidebar.expander(f"{i}. {paper['title']}"):
684
+ st.markdown(f"**Arxiv:** [Link]({paper['url']})")
685
+ if paper['full_audio']:
686
+ st.audio(paper['full_audio'])
687
+ if paper['download_base64']:
688
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
689
+ st.markdown(f"**Authors:** {paper['authors']}")
690
+ if paper['summary']:
691
+ st.markdown(f"**Summary:** {paper['summary'][:300]}...")
692
+
693
+ # ─────────────────────────────────────────────────────────
694
+ # 6. ZIP FUNCTION
695
+ # ─────────────────────────────────────────────────────────
696
+
697
+ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
698
+ """
699
+ 📦 Zip up all relevant files, generating a short name from high-info terms.
700
+ Returns the zip filename if created, else None.
701
+ """
702
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
703
+ all_files = md_files + mp3_files + wav_files
704
+ if not all_files:
705
+ return None
706
+
707
+ all_content = []
708
+ for f in all_files:
709
+ if f.endswith('.md'):
710
+ with open(f, "r", encoding='utf-8') as file:
711
+ all_content.append(file.read())
712
+ elif f.endswith('.mp3') or f.endswith('.wav'):
713
+ basename = os.path.splitext(os.path.basename(f))[0]
714
+ words = basename.replace('_', ' ')
715
+ all_content.append(words)
716
+
717
+ all_content.append(input_question)
718
+ combined_content = " ".join(all_content)
719
+ info_terms = get_high_info_terms(combined_content, top_n=10)
720
+
721
+ timestamp = format_timestamp_prefix()
722
+ name_text = '-'.join(term for term in info_terms[:5])
723
+ short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
724
+
725
+ with zipfile.ZipFile(short_zip_name, 'w') as z:
726
+ for f in all_files:
727
+ z.write(f)
728
+ return short_zip_name
729
+
730
+ # ─────────────────────────────────────────────────────────
731
+ # 7. MAIN AI LOGIC: LOOKUP & TAB HANDLERS
732
+ # ─────────────────────────────────────────────────────────
733
+
734
+
735
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
736
+ titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
737
+ """Main routine that uses Anthropic (Claude) + Gradio ArXiv RAG pipeline."""
738
+ start = time.time()
739
+ ai_constitution = """
740
+ You are a medical and machine learning review board expert and streamlit python and html5 expert. You are tasked with creating a streamlit app.py and requirements.txt for a solution that answers the questions with a working app to demonstrate. You are to use the paper list below to answer the question thinking through step by step how to create a streamlit app.py and requirements.txt for the solution that answers the questions with a working app to demonstrate.
741
+ """
742
+
743
+ # --- 1) Claude API
744
+ client = anthropic.Anthropic(api_key=anthropic_key)
745
+ user_input = q
746
+ response = client.messages.create(
747
+ model="claude-3-sonnet-20240229",
748
+ max_tokens=1000,
749
+ messages=[
750
+ {"role": "user", "content": user_input}
751
+ ])
752
+ st.write("Claude's reply 🧠:")
753
+ st.markdown(response.content[0].text)
754
+
755
+ # Save & produce audio
756
+ result = response.content[0].text
757
+ create_file(q, result)
758
+ md_file, audio_file = save_qa_with_audio(q, result)
759
+ st.subheader("📝 Main Response Audio")
760
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
761
+
762
+
763
+ if useArxiv:
764
+ q = q + result # Feed Arxiv the question and Claude's answer for prompt fortification to get better answers and references
765
+ # --- 2) Arxiv RAG
766
+ #st.write("Arxiv's AI this Evening is Mixtral 8x7B...")
767
+ st.write('Running Arxiv RAG with Claude inputs.')
768
+ #st.code(q, language="python", line_numbers=True, wrap_lines=True)
769
+
770
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
771
+ refs = client.predict(
772
+ q,
773
+ 10,
774
+ "Semantic Search",
775
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
776
+ api_name="/update_with_rag_md"
777
+ )[0]
778
+
779
+ #r2 = client.predict(
780
+ # q,
781
+ # "mistralai/Mixtral-8x7B-Instruct-v0.1",
782
+ # True,
783
+ # api_name="/ask_llm"
784
+ #)
785
+ #result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
786
+
787
+ result = f"🔎 {q}\n\n{refs}" # use original question q with result paired with paper references for best prompt fortification
788
+
789
+ md_file, audio_file = save_qa_with_audio(q, result)
790
+ st.subheader("📝 Main Response Audio")
791
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
792
+
793
+ # --- 3) Parse + handle papers
794
+ papers = parse_arxiv_refs(refs)
795
+ if papers:
796
+ # Create minimal links page first
797
+ paper_links = create_paper_links_md(papers)
798
+ links_file = create_file(q, paper_links, "md")
799
+ st.markdown(paper_links)
800
+
801
+ # Then create audio for each paper
802
+ if useArxivAudio:
803
+ create_paper_audio_files(papers, input_question=q)
804
+
805
+ display_papers(papers, get_marquee_settings()) # scrolling marquee per paper and summary
806
+
807
+ display_papers_in_sidebar(papers) # sidebar entry per paper and summary
808
+ else:
809
+ st.warning("No papers found in the response.")
810
+
811
+
812
+ # --- 4) Claude API with arxiv list of papers to app.py
813
+ client = anthropic.Anthropic(api_key=anthropic_key)
814
+ user_input = q + '\n\n' + 'Use the reference papers below to answer the question by creating a python streamlit app.py and requirements.txt with python libraries for creating a single app.py application that answers the questions with working code to demonstrate.'+ '\n\n'
815
+ response = client.messages.create(
816
+ model="claude-3-sonnet-20240229",
817
+ max_tokens=1000,
818
+ messages=[
819
+ {"role": "user", "content": user_input}
820
+ ])
821
+ r2 = response.content[0].text
822
+ st.write("Claude's reply 🧠:")
823
+ st.markdown(r2)
824
+
825
+
826
+
827
+ elapsed = time.time() - start
828
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
829
+ return result
830
+
831
+
832
+
833
+
834
+
835
+
836
+
837
+ def perform_ai_lookup_old(
838
+ q,
839
+ vocal_summary=True,
840
+ extended_refs=False,
841
+ titles_summary=True,
842
+ full_audio=False
843
+ ):
844
+ """
845
+ 🔮 Main routine that uses Anthropic (Claude) + optional Gradio ArXiv RAG pipeline.
846
+ Currently demonstrates calling Anthropic and returning the text.
847
+ """
848
+ with PerformanceTimer("ai_lookup"):
849
+ start = time.time()
850
+
851
+ # ▶ Example call to Anthropic (Claude)
852
+ client = anthropic.Anthropic(api_key=anthropic_key)
853
+ user_input = q
854
+
855
+ # Here we do a minimal prompt, just to show the call
856
+ # (You can enhance your prompt engineering as needed)
857
+ response = client.completions.create(
858
+ model="claude-2",
859
+ max_tokens_to_sample=512,
860
+ prompt=f"{anthropic.HUMAN_PROMPT} {user_input}{anthropic.AI_PROMPT}"
861
+ )
862
+
863
+ result_text = response.completion.strip()
864
+
865
+ # ▶ Print and store
866
+ st.write("### Claude's reply 🧠:")
867
+ st.markdown(result_text)
868
+
869
+
870
+ # Save & produce audio
871
+ #create_file(q, result_text)
872
+ #md_file, audio_file = save_qa_with_audio(q, result_text)
873
+ #st.subheader("📝 Main Response Audio")
874
+ #play_and_download_audio(audio_file, st.session_state['audio_format'])
875
+
876
+
877
+
878
+ # ▶ We'll add to the chat history
879
+ st.session_state.chat_history.append({"user": q, "claude": result_text})
880
+
881
+ # ▶ Return final text
882
+ end = time.time()
883
+ st.write(f"**Elapsed:** {end - start:.2f}s")
884
+
885
+ return result_text
886
+
887
+ async def process_voice_input(text):
888
+ """
889
+ 🎤 When user sends a voice query, we run the AI lookup + Q/A with audio.
890
+ Then we store the resulting markdown & audio in session or disk.
891
+ """
892
+ if not text:
893
+ return
894
+ st.subheader("🔍 Search Results")
895
+
896
+ # ▶ Call AI
897
+ result = perform_ai_lookup(
898
+ text,
899
+ vocal_summary=True,
900
+ extended_refs=False,
901
+ titles_summary=True,
902
+ full_audio=True
903
+ )
904
+
905
+ # ▶ Save Q&A as Markdown + audio (async)
906
+ md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio(text, result)
907
+
908
+ st.subheader("📝 Generated Files")
909
+ st.write(f"**Markdown:** {md_file} (saved in {md_time:.2f}s)")
910
+ if audio_file:
911
+ st.write(f"**Audio:** {audio_file} (generated in {audio_time:.2f}s)")
912
+ st.audio(audio_file)
913
+ dl_link = create_download_link_with_cache(audio_file, file_type=st.session_state['audio_format'])
914
+ st.markdown(dl_link, unsafe_allow_html=True)
915
+
916
+ def display_voice_tab():
917
+ """
918
+ 🎙️ Display the voice input tab with TTS settings and real-time usage.
919
+ """
920
+
921
+ # ▶ Voice Settings
922
+ st.sidebar.markdown("### 🎤 Voice Settings")
923
+ caption_female = 'Top: 🌸 **Aria** – 🎶 **Jenny** – 🌺 **Sonia** – 🌌 **Natasha** – 🌷 **Clara**'
924
+ caption_male = 'Bottom: 🌟 **Guy** – 🛠️ **Ryan** – 🎻 **William** – 🌟 **Liam**'
925
+
926
+ # Optionally, replace with your own local image or comment out
927
+ try:
928
+ st.sidebar.image('Group Picture - Voices.png', caption=caption_female + ' | ' + caption_male)
929
+ except:
930
+ st.sidebar.write('.')
931
+
932
+ selected_voice = st.sidebar.selectbox(
933
+ "👄 Select TTS Voice:",
934
+ options=EDGE_TTS_VOICES,
935
+ index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
936
+ )
937
+
938
+ st.sidebar.markdown("""
939
+ # 🎙️ Voice Character Agent Selector 🎭
940
+ *Female Voices*:
941
+ - 🌸 **Aria** – Elegant, creative storytelling
942
+ - 🎶 **Jenny** – Friendly, conversational
943
+ - 🌺 **Sonia** – Bold, confident
944
+ - 🌌 **Natasha** – Sophisticated, mysterious
945
+ - 🌷 **Clara** – Cheerful, empathetic
946
+
947
+ *Male Voices*:
948
+ - 🌟 **Guy** – Authoritative, versatile
949
+ - 🛠️ **Ryan** – Approachable, casual
950
+ - 🎻 **William** – Classic, scholarly
951
+ - 🌟 **Liam** – Energetic, engaging
952
+ """)
953
+
954
+
955
+ # ▶ Audio Format
956
+ st.markdown("### 🔊 Audio Format")
957
+ selected_format = st.radio(
958
+ "Choose Audio Format:",
959
+ options=["MP3", "WAV"],
960
+ index=0
961
+ )
962
+
963
+ # ▶ Update session state if changed
964
+ if selected_voice != st.session_state['tts_voice']:
965
+ st.session_state['tts_voice'] = selected_voice
966
+ st.rerun()
967
+ if selected_format.lower() != st.session_state['audio_format']:
968
+ st.session_state['audio_format'] = selected_format.lower()
969
+ st.rerun()
970
+
971
+ # ▶ Text Input
972
+ user_text = st.text_area("💬 Message:", height=100)
973
+ user_text = user_text.strip().replace('\n', ' ')
974
+
975
+ # ▶ Send Button
976
+ if st.button("📨 Send"):
977
+ # Run our process_voice_input as an async function
978
+ asyncio.run(process_voice_input(user_text))
979
+
980
+ # ▶ Chat History
981
+ st.subheader("📜 Chat History")
982
+ for c in st.session_state.chat_history:
983
+ st.write("**You:**", c["user"])
984
+ st.write("**Response:**", c["claude"])
985
+
986
+ # ─────────────────────────────────────────────────────────
987
+ # FILE HISTORY SIDEBAR
988
+ # ─────────────────────────────────────────────────────────
989
+
990
+ def display_file_history_in_sidebar():
991
+ """
992
+ 📂 Shows a history of local .md, .mp3, .wav files (newest first),
993
+ with quick icons and optional download links.
994
+ """
995
+ st.sidebar.markdown("---")
996
+ st.sidebar.markdown("### 📂 File History")
997
+
998
+ # ▶ Gather all files
999
+ md_files = glob.glob("*.md")
1000
+ mp3_files = glob.glob("*.mp3")
1001
+ wav_files = glob.glob("*.wav")
1002
+ all_files = md_files + mp3_files + wav_files
1003
+
1004
+ if not all_files:
1005
+ st.sidebar.write("No files found.")
1006
+ return
1007
+
1008
+ # ▶ Sort newest first
1009
+ all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
1010
+
1011
+ #for f in all_files:
1012
+ # fname = os.path.basename(f)
1013
+ # ext = os.path.splitext(fname)[1].lower().strip('.')
1014
+ # emoji = FILE_EMOJIS.get(ext, '📦')
1015
+ # time_str = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
1016
+
1017
+ #with st.sidebar.expander(f"{emoji} {fname}"):
1018
+ # st.write(f"**Modified:** {time_str}")
1019
+ # if ext == "md":
1020
+ # with open(f, "r", encoding="utf-8") as file_in:
1021
+ # snippet = file_in.read(200).replace("\n", " ")
1022
+ # if len(snippet) == 200:
1023
+ # snippet += "..."
1024
+ # st.write(snippet)
1025
+ # dl_link = create_download_link_with_cache(f, file_type="md")
1026
+ # st.markdown(dl_link, unsafe_allow_html=True)
1027
+ # elif ext in ["mp3","wav"]:
1028
+ # st.audio(f)
1029
+ # dl_link = create_download_link_with_cache(f, file_type=ext)
1030
+ # st.markdown(dl_link, unsafe_allow_html=True)
1031
+ # else:
1032
+ # dl_link = create_download_link_with_cache(f)
1033
+ # st.markdown(dl_link, unsafe_allow_html=True)
1034
+
1035
+
1036
+
1037
+ # Group files by their query prefix (timestamp_query)
1038
+ grouped_files = {}
1039
+ for f in all_files:
1040
+ fname = os.path.basename(f)
1041
+ prefix = '_'.join(fname.split('_')[:6]) # Get timestamp part
1042
+ if prefix not in grouped_files:
1043
+ grouped_files[prefix] = {'md': [], 'audio': [], 'loaded': False}
1044
+
1045
+ ext = os.path.splitext(fname)[1].lower()
1046
+ if ext == '.md':
1047
+ grouped_files[prefix]['md'].append(f)
1048
+ elif ext in ['.mp3', '.wav']:
1049
+ grouped_files[prefix]['audio'].append(f)
1050
+
1051
+ # Sort groups by timestamp (newest first)
1052
+ sorted_groups = sorted(grouped_files.items(), key=lambda x: x[0], reverse=True)
1053
+
1054
+ # 🗑⬇️ Sidebar delete all and zip all download
1055
+ col1, col4 = st.sidebar.columns(2)
1056
+ with col1:
1057
+ if st.button("🗑 Delete All"):
1058
+ for f in all_files:
1059
+ os.remove(f)
1060
+ st.rerun()
1061
+ st.session_state.should_rerun = True
1062
+ with col4:
1063
+ if st.button("⬇️ Zip All"):
1064
+ zip_name = create_zip_of_files(md_files, mp3_files, wav_files,
1065
+ st.session_state.get('last_query', ''))
1066
+ if zip_name:
1067
+ st.sidebar.markdown(get_download_link(zip_name, "zip"),
1068
+ unsafe_allow_html=True)
1069
+
1070
+ # Display grouped files
1071
+ for prefix, files in sorted_groups:
1072
+ # Get a preview of content from first MD file
1073
+ preview = ""
1074
+ if files['md']:
1075
+ with open(files['md'][0], "r", encoding="utf-8") as f:
1076
+ preview = f.read(200).replace("\n", " ")
1077
+ if len(preview) > 200:
1078
+ preview += "..."
1079
+
1080
+ # Create unique key for this group
1081
+ group_key = f"group_{prefix}"
1082
+ if group_key not in st.session_state:
1083
+ st.session_state[group_key] = False
1084
+
1085
+ # Display group expander
1086
+ with st.sidebar.expander(f"📑 Query Group: {prefix}"):
1087
+ st.write("**Preview:**")
1088
+ st.write(preview)
1089
+
1090
+ # Load full content button
1091
+ if st.button("📖 View Full Content", key=f"btn_{prefix}"):
1092
+ st.session_state[group_key] = True
1093
+
1094
+ # Only show full content and audio if button was clicked
1095
+ if st.session_state[group_key]:
1096
+ # Display markdown files
1097
+ for md_file in files['md']:
1098
+ with open(md_file, "r", encoding="utf-8") as f:
1099
+ content = f.read()
1100
+ st.markdown("**Full Content:**")
1101
+ st.markdown(content)
1102
+ st.markdown(get_download_link(md_file, file_type="md"),
1103
+ unsafe_allow_html=True)
1104
+
1105
+ # Display audio files
1106
+ usePlaySidebar=False
1107
+ if usePlaySidebar:
1108
+ for audio_file in files['audio']:
1109
+ ext = os.path.splitext(audio_file)[1].replace('.', '')
1110
+ st.audio(audio_file)
1111
+ st.markdown(get_download_link(audio_file, file_type=ext),
1112
+ unsafe_allow_html=True)
1113
+
1114
+
1115
+
1116
+
1117
+
1118
+ # ─────────────────────────────────────────────────────────
1119
+ # MAIN APP
1120
+ # ─────────────────────────────────────────────────────────
1121
+
1122
+ def main():
1123
+ # ▶ 1) Setup marquee UI in the sidebar
1124
+ update_marquee_settings_ui()
1125
+ marquee_settings = get_marquee_settings()
1126
+
1127
+ # ▶ 2) Display the marquee welcome
1128
+ display_marquee(
1129
+ st.session_state['marquee_content'],
1130
+ {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
1131
+ key_suffix="welcome"
1132
+ )
1133
+
1134
+ # ▶ 3) Main action tabs and model use choices
1135
+ tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
1136
+ horizontal=True)
1137
+
1138
+ useArxiv = st.checkbox("Search Arxiv for Research Paper Answers", value=True)
1139
+ useArxivAudio = st.checkbox("Generate Audio File for Research Paper Answers", value=False)
1140
+
1141
+ # ▶ 4) Show or hide custom component (optional example)
1142
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
1143
+ val = mycomponent(my_input_value="Hello from MyComponent")
1144
+
1145
+ if val:
1146
+ val_stripped = val.replace('\\n', ' ')
1147
+ edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
1148
+ run_option = st.selectbox("Model:", ["Arxiv", "Other (demo)"])
1149
+ col1, col2 = st.columns(2)
1150
+ with col1:
1151
+ autorun = st.checkbox("⚙ AutoRun", value=True)
1152
+ with col2:
1153
+ full_audio = st.checkbox("📚FullAudio", value=False)
1154
+
1155
+ input_changed = (val != st.session_state.old_val)
1156
+
1157
+ if autorun and input_changed:
1158
+ st.session_state.old_val = val
1159
+ st.session_state.last_query = edited_input
1160
+ perform_ai_lookup(edited_input,
1161
+ vocal_summary=True,
1162
+ extended_refs=False,
1163
+ titles_summary=True,
1164
+ full_audio=full_audio, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
1165
+ else:
1166
+ if st.button("▶ Run"):
1167
+ st.session_state.old_val = val
1168
+ st.session_state.last_query = edited_input
1169
+ perform_ai_lookup(edited_input,
1170
+ vocal_summary=True,
1171
+ extended_refs=False,
1172
+ titles_summary=True,
1173
+ full_audio=full_audio, useArxiv=useArxiv, useArxivAudio=useArxivAudio)
1174
+
1175
+ # ─────────────────────────────────────────────────────────
1176
+ # TAB: ArXiv
1177
+ # ─────────────────────────────────────────────────────────
1178
+ if tab_main == "🔍 ArXiv":
1179
+ st.subheader("🔍 Query ArXiv")
1180
+ q = st.text_input("🔍 Query:", key="arxiv_query")
1181
+
1182
+ st.markdown("### 🎛 Options")
1183
+ vocal_summary = st.checkbox("🎙ShortAudio", value=True, key="option_vocal_summary")
1184
+ extended_refs = st.checkbox("📜LongRefs", value=False, key="option_extended_refs")
1185
+ titles_summary = st.checkbox("🔖TitlesOnly", value=True, key="option_titles_summary")
1186
+ full_audio = st.checkbox("📚FullAudio", value=False, key="option_full_audio")
1187
+ full_transcript = st.checkbox("🧾FullTranscript", value=False, key="option_full_transcript")
1188
+
1189
+ if q and st.button("🔍Run"):
1190
+ st.session_state.last_query = q
1191
+ result = perform_ai_lookup(q,
1192
+ vocal_summary=vocal_summary,
1193
+ extended_refs=extended_refs,
1194
+ titles_summary=titles_summary,
1195
+ full_audio=full_audio)
1196
+ if full_transcript:
1197
+ create_file(q, result, "md")
1198
+
1199
+ # ─────────────────────────────────────────────────────────
1200
+ # TAB: Voice
1201
+ # ─────────────────────────────────────────────────────────
1202
+ elif tab_main == "🎤 Voice":
1203
+ display_voice_tab()
1204
+
1205
+ # ─────────────────────────────────────────────────────────
1206
+ # TAB: Media
1207
+ # ─────────────────────────────────────────────────────────
1208
+ elif tab_main == "📸 Media":
1209
+ st.header("📸 Media Gallery")
1210
+ tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"])
1211
+
1212
+ # ▶ AUDIO sub-tab
1213
+ with tabs[0]:
1214
+ st.subheader("🎵 Audio Files")
1215
+ audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
1216
+ if audio_files:
1217
+ for a in audio_files:
1218
+ with st.expander(os.path.basename(a)):
1219
+ st.audio(a)
1220
+ ext = os.path.splitext(a)[1].replace('.', '')
1221
+ dl_link = create_download_link_with_cache(a, file_type=ext)
1222
+ st.markdown(dl_link, unsafe_allow_html=True)
1223
+ else:
1224
+ st.write("No audio files found.")
1225
+
1226
+ # ▶ IMAGES sub-tab
1227
+ with tabs[1]:
1228
+ st.subheader("🖼 Image Files")
1229
+ imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
1230
+ if imgs:
1231
+ c = st.slider("Cols", 1, 5, 3, key="cols_images")
1232
+ cols = st.columns(c)
1233
+ for i, f in enumerate(imgs):
1234
+ with cols[i % c]:
1235
+ st.image(Image.open(f), use_container_width=True)
1236
+ else:
1237
+ st.write("No images found.")
1238
+
1239
+ # ▶ VIDEO sub-tab
1240
+ with tabs[2]:
1241
+ st.subheader("🎥 Video Files")
1242
+ vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
1243
+ if vids:
1244
+ for v in vids:
1245
+ with st.expander(os.path.basename(v)):
1246
+ st.video(v)
1247
+ else:
1248
+ st.write("No videos found.")
1249
+
1250
+ # ─────────────────────────────────────────────────────────
1251
+ # TAB: Editor
1252
+ # ─────────────────────────────────────────────────────────
1253
+ elif tab_main == "📝 Editor":
1254
+ st.write("### 📝 File Editor (Minimal Demo)")
1255
+ st.write("Select or create a file to edit. More advanced features can be added as needed.")
1256
+
1257
+ # ─────────────────────────────────────────────────────────
1258
+ # SIDEBAR: FILE HISTORY + PERFORMANCE METRICS
1259
+ # ─────────────────────────────────────────────────────────
1260
+ display_file_history_in_sidebar()
1261
+ log_performance_metrics()
1262
+
1263
+ # ▶ Some light CSS styling
1264
+ st.markdown("""
1265
+ <style>
1266
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
1267
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
1268
+ .stButton>button { margin-right: 0.5rem; }
1269
+ </style>
1270
+ """, unsafe_allow_html=True)
1271
+
1272
+ # ▶ Rerun if needed
1273
+ if st.session_state.should_rerun:
1274
+ st.session_state.should_rerun = False
1275
+ st.rerun()
1276
+
1277
+ # ─────────────────────────────────────────────────────────
1278
+ # 8. RUN APP
1279
+ # ─────────────────────────────────────────────────────────
1280
+
1281
+ if __name__ == "__main__":
1282
+ main()