awacke1 commited on
Commit
6ef6604
ยท
verified ยท
1 Parent(s): a484f78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +648 -1690
app.py CHANGED
@@ -1,19 +1,5 @@
1
  import streamlit as st
2
- import anthropic
3
- import openai
4
- import base64
5
- import cv2
6
- import glob
7
- import json
8
- import math
9
- import os
10
- import pytz
11
- import random
12
- import re
13
- import requests
14
- import textract
15
- import time
16
- import zipfile
17
  import plotly.graph_objects as go
18
  import streamlit.components.v1 as components
19
  from datetime import datetime
@@ -34,9 +20,6 @@ from streamlit.runtime.scriptrunner import get_script_run_ctx
34
  import asyncio
35
  import edge_tts
36
  from streamlit_marquee import streamlit_marquee
37
- from concurrent.futures import ThreadPoolExecutor
38
- from functools import partial
39
- from typing import Dict, List, Optional, Tuple, Union
40
 
41
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
42
  # 1. CORE CONFIGURATION & SETUP
@@ -52,7 +35,6 @@ st.set_page_config(
52
  'About': "๐ŸšฒTalkingAIResearcher๐Ÿ†"
53
  }
54
  )
55
-
56
  load_dotenv()
57
 
58
  # Available English voices for Edge TTS
@@ -68,51 +50,70 @@ EDGE_TTS_VOICES = [
68
  "en-CA-LiamNeural"
69
  ]
70
 
71
- # Session state initialization with default values
72
- DEFAULT_SESSION_STATE = {
73
- 'marquee_settings': {
74
  "background": "#1E1E1E",
75
  "color": "#FFFFFF",
76
  "font-size": "14px",
77
  "animationDuration": "20s",
78
  "width": "100%",
79
  "lineHeight": "35px"
80
- },
81
- 'tts_voice': EDGE_TTS_VOICES[0],
82
- 'audio_format': 'mp3',
83
- 'transcript_history': [],
84
- 'chat_history': [],
85
- 'openai_model': "gpt-4o-2024-05-13",
86
- 'messages': [],
87
- 'last_voice_input': "",
88
- 'editing_file': None,
89
- 'edit_new_name': "",
90
- 'edit_new_content': "",
91
- 'viewing_prefix': None,
92
- 'should_rerun': False,
93
- 'old_val': None,
94
- 'last_query': "",
95
- 'marquee_content': "๐Ÿš€ Welcome to TalkingAIResearcher | ๐Ÿค– Your Research Assistant",
96
- 'enable_audio': False,
97
- 'enable_download': False,
98
- 'enable_claude': True,
99
- 'audio_cache': {},
100
- 'paper_cache': {},
101
- 'download_link_cache': {},
102
- 'performance_metrics': defaultdict(list),
103
- 'operation_timings': defaultdict(float)
104
- }
 
 
 
 
 
 
105
 
106
- # Initialize session state
107
- for key, value in DEFAULT_SESSION_STATE.items():
108
- if key not in st.session_state:
109
- st.session_state[key] = value
110
 
111
- # API Keys and Configuration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
113
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
114
- xai_key = os.getenv('xai', "")
115
-
116
  if 'OPENAI_API_KEY' in st.secrets:
117
  openai_api_key = st.secrets['OPENAI_API_KEY']
118
  if 'ANTHROPIC_API_KEY' in st.secrets:
@@ -123,1012 +124,35 @@ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_OR
123
  HF_KEY = os.getenv('HF_KEY')
124
  API_URL = os.getenv('API_URL')
125
 
126
- # File type emojis for display
127
  FILE_EMOJIS = {
128
  "md": "๐Ÿ“",
129
  "mp3": "๐ŸŽต",
130
  "wav": "๐Ÿ”Š",
131
- "pdf": "๐Ÿ“„",
132
- "txt": "๐Ÿ“‹",
133
- "json": "๐Ÿ“Š",
134
- "csv": "๐Ÿ“ˆ"
 
 
 
135
  }
136
 
137
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝ๏ฟฝโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
138
- # 2. PERFORMANCE MONITORING & TIMING
139
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
140
 
141
- class PerformanceTimer:
142
- """Context manager for timing operations with automatic logging."""
143
- def __init__(self, operation_name: str):
144
- self.operation_name = operation_name
145
- self.start_time = None
146
-
147
- def __enter__(self):
148
- self.start_time = time.time()
149
- return self
150
-
151
- def __exit__(self, exc_type, exc_val, exc_tb):
152
- if not exc_type: # Only log if no exception occurred
153
- duration = time.time() - self.start_time
154
- st.session_state['operation_timings'][self.operation_name] = duration
155
- st.session_state['performance_metrics'][self.operation_name].append(duration)
156
-
157
- def log_performance_metrics():
158
- """Display performance metrics in the sidebar."""
159
- st.sidebar.markdown("### โฑ๏ธ Performance Metrics")
160
-
161
- metrics = st.session_state['operation_timings']
162
- if metrics:
163
- total_time = sum(metrics.values())
164
- st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
165
-
166
- # Create timing breakdown
167
- for operation, duration in metrics.items():
168
- percentage = (duration / total_time) * 100
169
- st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
170
-
171
- # Show timing history chart
172
- if st.session_state['performance_metrics']:
173
- history_data = []
174
- for op, times in st.session_state['performance_metrics'].items():
175
- if times: # Only show if we have timing data
176
- avg_time = sum(times) / len(times)
177
- history_data.append({"Operation": op, "Avg Time (s)": avg_time})
178
-
179
- if history_data: # Create chart if we have data
180
- st.sidebar.markdown("### ๐Ÿ“ˆ Timing History")
181
- chart_data = pd.DataFrame(history_data)
182
- st.sidebar.bar_chart(chart_data.set_index("Operation"))
183
-
184
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
185
- # 3. OPTIMIZED AUDIO GENERATION
186
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
187
-
188
- def clean_for_speech(text: str) -> str:
189
- """Clean up text for TTS output with enhanced cleaning."""
190
- with PerformanceTimer("text_cleaning"):
191
- # Remove markdown formatting
192
- text = re.sub(r'#+ ', '', text) # Remove headers
193
- text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Clean links
194
- text = re.sub(r'[*_~`]', '', text) # Remove emphasis markers
195
-
196
- # Remove code blocks
197
- text = re.sub(r'```[\s\S]*?```', '', text)
198
- text = re.sub(r'`[^`]*`', '', text)
199
-
200
- # Clean up whitespace
201
- text = re.sub(r'\s+', ' ', text)
202
- text = text.replace("\n", " ")
203
- text = text.replace("</s>", " ")
204
-
205
- # Remove URLs
206
- text = re.sub(r'https?://\S+', '', text)
207
- text = re.sub(r'\(https?://[^\)]+\)', '', text)
208
-
209
- # Final cleanup
210
- text = text.strip()
211
- return text
212
-
213
- async def async_edge_tts_generate(
214
- text: str,
215
- voice: str,
216
- rate: int = 0,
217
- pitch: int = 0,
218
- file_format: str = "mp3"
219
- ) -> Tuple[Optional[str], float]:
220
- """Asynchronous TTS generation with performance tracking and caching."""
221
- with PerformanceTimer("tts_generation") as timer:
222
- # Clean and validate text
223
- text = clean_for_speech(text)
224
- if not text.strip():
225
- return None, 0
226
-
227
- # Check cache
228
- cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
229
- if cache_key in st.session_state['audio_cache']:
230
- return st.session_state['audio_cache'][cache_key], 0
231
-
232
- try:
233
- # Generate audio
234
- rate_str = f"{rate:+d}%"
235
- pitch_str = f"{pitch:+d}Hz"
236
- communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
237
-
238
- # Generate unique filename
239
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
240
- filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
241
-
242
- # Save audio file
243
- await communicate.save(filename)
244
-
245
- # Cache result
246
- st.session_state['audio_cache'][cache_key] = filename
247
- return filename, time.time() - timer.start_time
248
-
249
- except Exception as e:
250
- st.error(f"Error generating audio: {str(e)}")
251
- return None, 0
252
-
253
- async def async_save_qa_with_audio(
254
- question: str,
255
- answer: str,
256
- voice: Optional[str] = None
257
- ) -> Tuple[str, Optional[str], float, float]:
258
- """Asynchronously save Q&A to markdown and generate audio with timing."""
259
- voice = voice or st.session_state['tts_voice']
260
-
261
- with PerformanceTimer("qa_save") as timer:
262
- # Save markdown
263
- md_start = time.time()
264
- combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
265
- md_file = create_file(question, answer, "md")
266
- md_time = time.time() - md_start
267
-
268
- # Generate audio if enabled
269
- audio_file = None
270
- audio_time = 0
271
- if st.session_state['enable_audio']:
272
- audio_text = f"{question}\n\nAnswer: {answer}"
273
- audio_file, audio_time = await async_edge_tts_generate(
274
- audio_text,
275
- voice=voice,
276
- file_format=st.session_state['audio_format']
277
- )
278
-
279
- return md_file, audio_file, md_time, audio_time
280
-
281
- def create_download_link_with_cache(
282
- file_path: str,
283
- file_type: str = "mp3"
284
- ) -> str:
285
- """Create download link with caching and error handling."""
286
- with PerformanceTimer("download_link_generation"):
287
- # Check cache first
288
- cache_key = f"dl_{file_path}"
289
- if cache_key in st.session_state['download_link_cache']:
290
- return st.session_state['download_link_cache'][cache_key]
291
-
292
- try:
293
- with open(file_path, "rb") as f:
294
- b64 = base64.b64encode(f.read()).decode()
295
-
296
- # Generate appropriate link based on file type
297
- filename = os.path.basename(file_path)
298
- if file_type == "mp3":
299
- link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">๐ŸŽต Download {filename}</a>'
300
- elif file_type == "wav":
301
- link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">๐Ÿ”Š Download {filename}</a>'
302
- elif file_type == "md":
303
- link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">๐Ÿ“ Download {filename}</a>'
304
- else:
305
- link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">โฌ‡๏ธ Download {filename}</a>'
306
-
307
- # Cache and return
308
- st.session_state['download_link_cache'][cache_key] = link
309
- return link
310
-
311
- except Exception as e:
312
- st.error(f"Error creating download link: {str(e)}")
313
- return ""
314
-
315
-
316
-
317
- # ---
318
- def display_voice_tab():
319
- """Display voice input tab with TTS settings."""
320
- st.subheader("๐ŸŽค Voice Input")
321
-
322
- # Voice Settings Section
323
- st.markdown("### ๐ŸŽค Voice Settings")
324
- captionFemale='Top: ๐ŸŒธ **Aria** โ€“ ๐ŸŽถ **Jenny** โ€“ ๐ŸŒบ **Sonia** โ€“ ๐ŸŒŒ **Natasha** โ€“ ๐ŸŒท **Clara**'
325
- captionMale='Bottom: ๐ŸŒŸ **Guy** โ€“ ๐Ÿ› ๏ธ **Ryan** โ€“ ๐ŸŽป **William** โ€“ ๐ŸŒŸ **Liam**'
326
- st.sidebar.image('Group Picture - Voices.png', caption=captionFemale + ' - ' + captionMale)
327
- st.sidebar.markdown("""
328
-
329
- # ๐ŸŽ™๏ธ Voice Character Agent Selector ๐ŸŽญ
330
- 1. Female:
331
- - ๐ŸŒธ **Aria** โ€“ Female: ๐ŸŒŸ The voice of elegance and creativity, perfect for soothing storytelling or inspiring ideas.
332
- - ๐ŸŽถ **Jenny** โ€“ Female: ๐Ÿ’– Sweet and friendly, sheโ€™s the go-to for warm, conversational tones.
333
- - ๐ŸŒบ **Sonia** โ€“ Female: ๐Ÿ’ƒ Bold and confident, ideal for commanding attention and delivering with flair.
334
- - ๐ŸŒŒ **Natasha** โ€“ Female: โœจ Enigmatic and sophisticated, Natasha is great for a touch of mystery and charm.
335
- - ๐ŸŒท **Clara** โ€“ Female: ๐ŸŽ€ Cheerful and gentle, perfect for nurturing, empathetic conversations.
336
- ---
337
- 2. Male:
338
- - ๐ŸŒŸ **Guy** โ€“ Male: ๐ŸŽฉ Sophisticated and versatile, a natural fit for clear and authoritative delivery.
339
- - ๐Ÿ› ๏ธ **Ryan** โ€“ Male: ๐Ÿค Down-to-earth and approachable, ideal for friendly and casual exchanges.
340
- - ๐ŸŽป **William** โ€“ Male: ๐Ÿ“š Classic and refined, perfect for a scholarly or thoughtful tone.
341
- - ๐ŸŒŸ **Liam** โ€“ Male: โšก Energetic and upbeat, great for dynamic, engaging interactions.
342
-
343
- """)
344
- selected_voice = st.selectbox(
345
- "Select TTS Voice:",
346
- options=EDGE_TTS_VOICES,
347
- index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
348
- )
349
-
350
- # Audio Format Selection
351
- st.markdown("### ๐Ÿ”Š Audio Format")
352
- selected_format = st.radio(
353
- "Choose Audio Format:",
354
- options=["MP3", "WAV"],
355
- index=0
356
- )
357
-
358
- # Update session state if settings change
359
- if selected_voice != st.session_state['tts_voice']:
360
- st.session_state['tts_voice'] = selected_voice
361
- st.rerun()
362
- if selected_format.lower() != st.session_state['audio_format']:
363
- st.session_state['audio_format'] = selected_format.lower()
364
- st.rerun()
365
-
366
- # Text Input Area
367
- user_text = st.text_area("๐Ÿ’ฌ Message:", height=100)
368
- user_text = user_text.strip().replace('\n', ' ')
369
-
370
- # Send Button
371
- if st.button("๐Ÿ“จ Send"):
372
- process_voice_input(user_text)
373
-
374
- # Chat History
375
- st.subheader("๐Ÿ“œ Chat History")
376
- for c in st.session_state.chat_history:
377
- st.write("**You:**", c["user"])
378
- st.write("**Response:**", c["claude"])
379
-
380
- def display_arxiv_tab():
381
- """Display ArXiv search tab with options."""
382
- st.subheader("๐Ÿ” Query ArXiv")
383
- q = st.text_input("๐Ÿ” Query:", key="arxiv_query")
384
-
385
- # Options Section
386
- st.markdown("### ๐ŸŽ› Options")
387
- col1, col2 = st.columns(2)
388
-
389
- with col1:
390
- vocal_summary = st.checkbox("๐ŸŽ™ Short Audio", value=True,
391
- key="option_vocal_summary")
392
- extended_refs = st.checkbox("๐Ÿ“œ Long Refs", value=False,
393
- key="option_extended_refs")
394
-
395
- with col2:
396
- titles_summary = st.checkbox("๐Ÿ”– Titles Only", value=True,
397
- key="option_titles_summary")
398
- full_audio = st.checkbox("๐Ÿ“š Full Audio", value=False,
399
- key="option_full_audio")
400
-
401
- full_transcript = st.checkbox("๐Ÿงพ Full Transcript", value=False,
402
- key="option_full_transcript")
403
-
404
- if q and st.button("๐Ÿ” Run Search"):
405
- st.session_state.last_query = q
406
- result, timings = perform_ai_lookup(
407
- q,
408
- vocal_summary=vocal_summary,
409
- extended_refs=extended_refs,
410
- titles_summary=titles_summary,
411
- full_audio=full_audio
412
- )
413
-
414
- if full_transcript:
415
- create_file(q, result, "md")
416
-
417
- def display_media_tab():
418
- """Display media gallery tab with audio, images, and video."""
419
- st.header("๐Ÿ“ธ Media Gallery")
420
-
421
- # Create tabs for different media types
422
- tabs = st.tabs(["๐ŸŽต Audio", "๐Ÿ–ผ Images", "๐ŸŽฅ Video"])
423
-
424
- # Audio Files Tab
425
- with tabs[0]:
426
- st.subheader("๐ŸŽต Audio Files")
427
- audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
428
-
429
- if audio_files:
430
- for audio_file in audio_files:
431
- with st.expander(os.path.basename(audio_file)):
432
- st.audio(audio_file)
433
- ext = os.path.splitext(audio_file)[1].replace('.', '')
434
- dl_link = get_download_link(audio_file, file_type=ext)
435
- st.markdown(dl_link, unsafe_allow_html=True)
436
- else:
437
- st.write("No audio files found.")
438
-
439
- # Images Tab
440
- with tabs[1]:
441
- st.subheader("๐Ÿ–ผ Image Files")
442
- image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
443
-
444
- if image_files:
445
- cols = st.slider("Columns:", 1, 5, 3, key="cols_images")
446
- image_cols = st.columns(cols)
447
-
448
- for i, img_file in enumerate(image_files):
449
- with image_cols[i % cols]:
450
- try:
451
- img = Image.open(img_file)
452
- st.image(img, use_column_width=True)
453
- except Exception as e:
454
- st.error(f"Error loading image {img_file}: {str(e)}")
455
- else:
456
- st.write("No images found.")
457
-
458
- # Video Tab
459
- with tabs[2]:
460
- st.subheader("๐ŸŽฅ Video Files")
461
- video_files = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
462
-
463
- if video_files:
464
- for video_file in video_files:
465
- with st.expander(os.path.basename(video_file)):
466
- st.video(video_file)
467
- else:
468
- st.write("No videos found.")
469
-
470
- def display_editor_tab():
471
- """Display text editor tab with file management."""
472
- st.subheader("๐Ÿ“ Text Editor")
473
-
474
- # File Management Section
475
- st.markdown("### ๐Ÿ“‚ File Management")
476
-
477
- # File Selection
478
- md_files = glob.glob("*.md")
479
- selected_file = st.selectbox(
480
- "Select file to edit:",
481
- ["New File"] + md_files,
482
- key="file_selector"
483
- )
484
-
485
- # Edit Area
486
- if selected_file == "New File":
487
- new_filename = st.text_input("New filename (without extension):")
488
- file_content = st.text_area("Content:", height=300)
489
-
490
- if st.button("๐Ÿ’พ Save File"):
491
- if new_filename:
492
- try:
493
- with open(f"{new_filename}.md", 'w', encoding='utf-8') as f:
494
- f.write(file_content)
495
- st.success(f"File {new_filename}.md saved successfully!")
496
- st.session_state.should_rerun = True
497
- except Exception as e:
498
- st.error(f"Error saving file: {str(e)}")
499
- else:
500
- st.warning("Please enter a filename.")
501
- else:
502
- try:
503
- # Load existing file content
504
- with open(selected_file, 'r', encoding='utf-8') as f:
505
- file_content = f.read()
506
-
507
- # Edit existing file
508
- edited_content = st.text_area(
509
- "Edit content:",
510
- value=file_content,
511
- height=300
512
- )
513
-
514
- col1, col2 = st.columns(2)
515
- with col1:
516
- if st.button("๐Ÿ’พ Save Changes"):
517
- try:
518
- with open(selected_file, 'w', encoding='utf-8') as f:
519
- f.write(edited_content)
520
- st.success("Changes saved successfully!")
521
- except Exception as e:
522
- st.error(f"Error saving changes: {str(e)}")
523
-
524
- with col2:
525
- if st.button("๐Ÿ—‘ Delete File"):
526
- try:
527
- os.remove(selected_file)
528
- st.success(f"File {selected_file} deleted successfully!")
529
- st.session_state.should_rerun = True
530
- except Exception as e:
531
- st.error(f"Error deleting file: {str(e)}")
532
-
533
- except Exception as e:
534
- st.error(f"Error loading file {selected_file}: {str(e)}")
535
-
536
- def display_settings_tab():
537
- """Display application settings tab."""
538
- st.subheader("โš™๏ธ Settings")
539
-
540
- # General Settings
541
- st.markdown("### ๐Ÿ”ง General Settings")
542
-
543
- # Theme Selection
544
- theme = st.selectbox(
545
- "Color Theme:",
546
- ["Dark", "Light", "Custom"],
547
- index=0
548
- )
549
-
550
- if theme == "Custom":
551
- st.color_picker("Primary Color:", "#1E1E1E")
552
- st.color_picker("Secondary Color:", "#2D2D2D")
553
-
554
- # Performance Settings
555
- st.markdown("### โšก Performance Settings")
556
-
557
- # Cache Settings
558
- cache_size = st.slider(
559
- "Maximum Cache Size (MB):",
560
- 0, 1000, 100
561
- )
562
-
563
- if st.button("Clear Cache"):
564
- st.session_state['audio_cache'] = {}
565
- st.session_state['paper_cache'] = {}
566
- st.session_state['download_link_cache'] = {}
567
- st.success("Cache cleared successfully!")
568
-
569
- # API Settings
570
- st.markdown("### ๐Ÿ”‘ API Settings")
571
-
572
- # Show/hide API keys
573
- show_keys = st.checkbox("Show API Keys")
574
- if show_keys:
575
- st.text_input("OpenAI API Key:", value=openai_api_key)
576
- st.text_input("Anthropic API Key:", value=anthropic_key)
577
-
578
- # Save Settings
579
- if st.button("๐Ÿ’พ Save Settings"):
580
- st.success("Settings saved successfully!")
581
- st.session_state.should_rerun = True
582
-
583
-
584
-
585
- def get_download_link(file: str, file_type: str = "zip") -> str:
586
- """
587
- Convert a file to base64 and return an HTML link for download.
588
- Supports multiple file types with appropriate MIME types.
589
- """
590
- try:
591
- with open(file, "rb") as f:
592
- b64 = base64.b64encode(f.read()).decode()
593
-
594
- # Get filename for display
595
- filename = os.path.basename(file)
596
-
597
- # Define MIME types and emoji icons for different file types
598
- mime_types = {
599
- "zip": ("application/zip", "๐Ÿ“‚"),
600
- "mp3": ("audio/mpeg", "๐ŸŽต"),
601
- "wav": ("audio/wav", "๐Ÿ”Š"),
602
- "md": ("text/markdown", "๐Ÿ“"),
603
- "pdf": ("application/pdf", "๐Ÿ“„"),
604
- "txt": ("text/plain", "๐Ÿ“‹"),
605
- "json": ("application/json", "๐Ÿ“Š"),
606
- "csv": ("text/csv", "๐Ÿ“ˆ"),
607
- "png": ("image/png", "๐Ÿ–ผ"),
608
- "jpg": ("image/jpeg", "๐Ÿ–ผ"),
609
- "jpeg": ("image/jpeg", "๐Ÿ–ผ")
610
- }
611
-
612
- # Get MIME type and emoji for file
613
- mime_type, emoji = mime_types.get(
614
- file_type.lower(),
615
- ("application/octet-stream", "โฌ‡๏ธ")
616
- )
617
-
618
- # Create download link with appropriate MIME type
619
- link = f'<a href="data:{mime_type};base64,{b64}" download="{filename}">{emoji} Download {filename}</a>'
620
-
621
- return link
622
-
623
- except FileNotFoundError:
624
- return f"<p style='color: red'>โŒ File not found: {file}</p>"
625
- except Exception as e:
626
- return f"<p style='color: red'>โŒ Error creating download link: {str(e)}</p>"
627
-
628
- def play_and_download_audio(file_path: str, file_type: str = "mp3"):
629
- """
630
- Display audio player and download link for audio file.
631
- Includes error handling and file validation.
632
- """
633
- if not file_path:
634
- st.warning("No audio file provided.")
635
- return
636
-
637
- if not os.path.exists(file_path):
638
- st.error(f"Audio file not found: {file_path}")
639
- return
640
-
641
- try:
642
- # Display audio player
643
- st.audio(file_path)
644
-
645
- # Create and display download link
646
- dl_link = get_download_link(file_path, file_type=file_type)
647
- st.markdown(dl_link, unsafe_allow_html=True)
648
-
649
- except Exception as e:
650
- st.error(f"Error playing audio: {str(e)}")
651
-
652
- def get_file_info(file_path: str) -> dict:
653
- """
654
- Get detailed information about a file.
655
- Returns dictionary with size, modification time, and other metadata.
656
- """
657
- try:
658
- stats = os.stat(file_path)
659
-
660
- # Get basic file information
661
- info = {
662
- 'name': os.path.basename(file_path),
663
- 'path': file_path,
664
- 'size': stats.st_size,
665
- 'modified': datetime.fromtimestamp(stats.st_mtime),
666
- 'created': datetime.fromtimestamp(stats.st_ctime),
667
- 'type': os.path.splitext(file_path)[1].lower().strip('.'),
668
- }
669
-
670
- # Add formatted size
671
- if info['size'] < 1024:
672
- info['size_fmt'] = f"{info['size']} B"
673
- elif info['size'] < 1024 * 1024:
674
- info['size_fmt'] = f"{info['size']/1024:.1f} KB"
675
- else:
676
- info['size_fmt'] = f"{info['size']/(1024*1024):.1f} MB"
677
-
678
- # Add formatted dates
679
- info['modified_fmt'] = info['modified'].strftime("%Y-%m-%d %H:%M:%S")
680
- info['created_fmt'] = info['created'].strftime("%Y-%m-%d %H:%M:%S")
681
-
682
- return info
683
-
684
- except Exception as e:
685
- st.error(f"Error getting file info: {str(e)}")
686
- return None
687
-
688
- def sanitize_filename(filename: str) -> str:
689
- """
690
- Clean and sanitize a filename to ensure it's safe for filesystem.
691
- Removes/replaces unsafe characters and enforces length limits.
692
- """
693
- # Remove or replace unsafe characters
694
- filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
695
-
696
- # Remove leading/trailing spaces and dots
697
- filename = filename.strip('. ')
698
-
699
- # Limit length (reserving space for extension)
700
- max_length = 255
701
- name, ext = os.path.splitext(filename)
702
- if len(filename) > max_length:
703
- return name[:(max_length-len(ext))] + ext
704
-
705
- return filename
706
-
707
- def create_file_with_metadata(filename: str, content: str, metadata: dict = None):
708
- """
709
- Create a file with optional metadata header.
710
- Useful for storing additional information with files.
711
- """
712
- try:
713
- # Sanitize filename
714
- safe_filename = sanitize_filename(filename)
715
-
716
- # Ensure directory exists
717
- os.makedirs(os.path.dirname(safe_filename) or '.', exist_ok=True)
718
-
719
- # Prepare content with metadata
720
- if metadata:
721
- metadata_str = json.dumps(metadata, indent=2)
722
- full_content = f"""---
723
- {metadata_str}
724
- ---
725
- {content}"""
726
- else:
727
- full_content = content
728
-
729
- # Write file
730
- with open(safe_filename, 'w', encoding='utf-8') as f:
731
- f.write(full_content)
732
-
733
- return safe_filename
734
-
735
- except Exception as e:
736
- st.error(f"Error creating file: {str(e)}")
737
- return None
738
-
739
- def read_file_with_metadata(filename: str) -> tuple:
740
- """
741
- Read a file and extract any metadata header.
742
- Returns tuple of (content, metadata).
743
- """
744
- try:
745
- with open(filename, 'r', encoding='utf-8') as f:
746
- content = f.read()
747
-
748
- # Check for metadata section
749
- if content.startswith('---\n'):
750
- # Find end of metadata section
751
- end_meta = content.find('\n---\n', 4)
752
- if end_meta != -1:
753
- try:
754
- metadata = json.loads(content[4:end_meta])
755
- content = content[end_meta+5:]
756
- return content, metadata
757
- except json.JSONDecodeError:
758
- pass
759
-
760
- return content, None
761
-
762
- except Exception as e:
763
- st.error(f"Error reading file: {str(e)}")
764
- return None, None
765
-
766
- def archive_files(file_paths: list, archive_name: str = None) -> str:
767
- """
768
- Create a zip archive containing the specified files.
769
- Returns path to created archive.
770
- """
771
- try:
772
- # Generate archive name if not provided
773
- if not archive_name:
774
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
775
- archive_name = f"archive_{timestamp}.zip"
776
-
777
- # Create zip file
778
- with zipfile.ZipFile(archive_name, 'w', zipfile.ZIP_DEFLATED) as zf:
779
- for file_path in file_paths:
780
- if os.path.exists(file_path):
781
- zf.write(file_path, os.path.basename(file_path))
782
-
783
- return archive_name
784
-
785
- except Exception as e:
786
- st.error(f"Error creating archive: {str(e)}")
787
- return None
788
-
789
- def list_files_by_type(directory: str = ".",
790
- extensions: list = None,
791
- recursive: bool = False) -> dict:
792
- """
793
- List files in directory filtered by extension.
794
- Returns dict grouping files by type.
795
- """
796
- try:
797
- if extensions is None:
798
- extensions = ['md', 'mp3', 'wav', 'pdf', 'txt', 'json', 'csv']
799
-
800
- files = {}
801
- pattern = "**/*" if recursive else "*"
802
-
803
- for ext in extensions:
804
- glob_pattern = f"{pattern}.{ext}"
805
- matches = glob.glob(os.path.join(directory, glob_pattern),
806
- recursive=recursive)
807
- if matches:
808
- files[ext] = matches
809
-
810
- return files
811
-
812
- except Exception as e:
813
- st.error(f"Error listing files: {str(e)}")
814
- return {}
815
-
816
-
817
-
818
-
819
-
820
-
821
- def get_central_time() -> datetime:
822
  """Get current time in US Central timezone."""
823
  central = pytz.timezone('US/Central')
824
  return datetime.now(central)
825
 
826
- def format_timestamp_prefix() -> str:
827
  """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM."""
828
  ct = get_central_time()
829
  return ct.strftime("%m_%d_%y_%I_%M_%p")
830
 
831
- def get_formatted_time(dt: datetime = None,
832
- timezone: str = 'US/Central',
833
- include_timezone: bool = True,
834
- include_seconds: bool = False) -> str:
835
- """
836
- Format a datetime object with specified options.
837
- If no datetime is provided, uses current time.
838
- """
839
- if dt is None:
840
- tz = pytz.timezone(timezone)
841
- dt = datetime.now(tz)
842
- elif dt.tzinfo is None:
843
- tz = pytz.timezone(timezone)
844
- dt = tz.localize(dt)
845
-
846
- format_string = "%Y-%m-%d %I:%M"
847
- if include_seconds:
848
- format_string += ":%S"
849
- format_string += " %p"
850
- if include_timezone:
851
- format_string += " %Z"
852
-
853
- return dt.strftime(format_string)
854
-
855
- def parse_timestamp(timestamp_str: str,
856
- timezone: str = 'US/Central') -> Optional[datetime]:
857
- """
858
- Parse a timestamp string in various formats.
859
- Returns timezone-aware datetime object.
860
- """
861
- try:
862
- # Try different format patterns
863
- patterns = [
864
- "%m_%d_%y_%I_%M_%p", # Standard app format
865
- "%Y-%m-%d %I:%M %p", # Common 12-hour format
866
- "%Y-%m-%d %H:%M", # 24-hour format
867
- "%m/%d/%y %I:%M %p", # US date format
868
- "%d/%m/%y %I:%M %p" # European date format
869
- ]
870
-
871
- dt = None
872
- for pattern in patterns:
873
- try:
874
- dt = datetime.strptime(timestamp_str, pattern)
875
- break
876
- except ValueError:
877
- continue
878
-
879
- if dt is None:
880
- raise ValueError(f"Could not parse timestamp: {timestamp_str}")
881
-
882
- # Add timezone if not present
883
- if dt.tzinfo is None:
884
- tz = pytz.timezone(timezone)
885
- dt = tz.localize(dt)
886
-
887
- return dt
888
-
889
- except Exception as e:
890
- st.error(f"Error parsing timestamp: {str(e)}")
891
- return None
892
-
893
- def get_time_ago(dt: datetime) -> str:
894
- """
895
- Convert datetime to human-readable "time ago" format.
896
- E.g., "2 hours ago", "3 days ago", etc.
897
- """
898
- try:
899
- now = datetime.now(dt.tzinfo)
900
- diff = now - dt
901
-
902
- seconds = diff.total_seconds()
903
-
904
- if seconds < 60:
905
- return "just now"
906
- elif seconds < 3600:
907
- minutes = int(seconds / 60)
908
- return f"{minutes} minute{'s' if minutes != 1 else ''} ago"
909
- elif seconds < 86400:
910
- hours = int(seconds / 3600)
911
- return f"{hours} hour{'s' if hours != 1 else ''} ago"
912
- elif seconds < 604800:
913
- days = int(seconds / 86400)
914
- return f"{days} day{'s' if days != 1 else ''} ago"
915
- elif seconds < 2592000:
916
- weeks = int(seconds / 604800)
917
- return f"{weeks} week{'s' if weeks != 1 else ''} ago"
918
- elif seconds < 31536000:
919
- months = int(seconds / 2592000)
920
- return f"{months} month{'s' if months != 1 else ''} ago"
921
- else:
922
- years = int(seconds / 31536000)
923
- return f"{years} year{'s' if years != 1 else ''} ago"
924
-
925
- except Exception as e:
926
- st.error(f"Error calculating time ago: {str(e)}")
927
- return "unknown time ago"
928
-
929
- def format_duration(seconds: float) -> str:
930
- """
931
- Format a duration in seconds to human-readable string.
932
- E.g., "2m 30s", "1h 15m", etc.
933
- """
934
- try:
935
- if seconds < 0:
936
- return "invalid duration"
937
-
938
- # Handle special cases
939
- if seconds < 1:
940
- return f"{seconds * 1000:.0f}ms"
941
- if seconds < 60:
942
- return f"{seconds:.1f}s"
943
-
944
- # Calculate hours, minutes, seconds
945
- hours = int(seconds // 3600)
946
- minutes = int((seconds % 3600) // 60)
947
- secs = seconds % 60
948
-
949
- # Build duration string
950
- parts = []
951
- if hours > 0:
952
- parts.append(f"{hours}h")
953
- if minutes > 0:
954
- parts.append(f"{minutes}m")
955
- if secs > 0 and hours == 0: # Only show seconds if less than an hour
956
- parts.append(f"{secs:.1f}s")
957
-
958
- return " ".join(parts)
959
-
960
- except Exception as e:
961
- st.error(f"Error formatting duration: {str(e)}")
962
- return "unknown duration"
963
-
964
-
965
-
966
-
967
-
968
-
969
- async def create_paper_audio_files(papers: List[Dict], input_question: str):
970
- """Generate audio files for papers asynchronously with improved naming."""
971
- with PerformanceTimer("paper_audio_generation"):
972
- tasks = []
973
- for paper in papers:
974
- try:
975
- # Prepare text for audio generation
976
- audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
977
- audio_text = clean_for_speech(audio_text)
978
-
979
- # Create sanitized title for filename
980
- safe_title = paper['title'].lower()
981
- safe_title = re.sub(r'[^\w\s-]', '', safe_title) # Remove special chars
982
- safe_title = re.sub(r'\s+', '_', safe_title) # Replace spaces with underscores
983
- safe_title = safe_title[:100] # Limit length
984
-
985
- # Generate timestamp
986
- timestamp = format_timestamp_prefix()
987
-
988
- # Create filename with timestamp and title
989
- filename = f"{timestamp}_{safe_title}.{st.session_state['audio_format']}"
990
-
991
- # Create task for audio generation
992
- async def generate_audio(text, filename):
993
- rate_str = "0%"
994
- pitch_str = "0Hz"
995
- communicate = edge_tts.Communicate(text, st.session_state['tts_voice'])
996
- await communicate.save(filename)
997
- return filename
998
-
999
- task = generate_audio(audio_text, filename)
1000
- tasks.append((paper, task, filename))
1001
-
1002
- except Exception as e:
1003
- st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}")
1004
- continue
1005
-
1006
- # Process all audio generation tasks concurrently
1007
- for paper, task, filename in tasks:
1008
- try:
1009
- audio_file = await task
1010
- if audio_file:
1011
- paper['full_audio'] = audio_file
1012
- if st.session_state['enable_download']:
1013
- paper['download_base64'] = create_download_link_with_cache(
1014
- audio_file,
1015
- st.session_state['audio_format']
1016
- )
1017
- except Exception as e:
1018
- st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
1019
- paper['full_audio'] = None
1020
- paper['download_base64'] = ''
1021
-
1022
-
1023
-
1024
-
1025
-
1026
-
1027
-
1028
-
1029
-
1030
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1031
- # 4. PAPER PROCESSING & DISPLAY
1032
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1033
-
1034
- def parse_arxiv_refs(ref_text: str) -> List[Dict[str, str]]:
1035
- """Parse arxiv references with improved error handling."""
1036
- if not ref_text:
1037
- return []
1038
-
1039
- with PerformanceTimer("parse_refs"):
1040
- results = []
1041
- current_paper = {}
1042
- lines = ref_text.split('\n')
1043
-
1044
- for i, line in enumerate(lines):
1045
- try:
1046
- if line.count('|') == 2:
1047
- # Found a new paper line
1048
- if current_paper:
1049
- results.append(current_paper)
1050
- if len(results) >= 20: # Limit to 20 papers
1051
- break
1052
-
1053
- # Parse header parts
1054
- header_parts = line.strip('* ').split('|')
1055
- date = header_parts[0].strip()
1056
- title = header_parts[1].strip()
1057
- url_match = re.search(r'(https://arxiv.org/\S+)', line)
1058
- url = url_match.group(1) if url_match else f"paper_{len(results)}"
1059
-
1060
- current_paper = {
1061
- 'date': date,
1062
- 'title': title,
1063
- 'url': url,
1064
- 'authors': '',
1065
- 'summary': '',
1066
- 'full_audio': None,
1067
- 'download_base64': '',
1068
- }
1069
-
1070
- elif current_paper:
1071
- # Add content to current paper
1072
- line = line.strip('* ')
1073
- if not current_paper['authors']:
1074
- current_paper['authors'] = line
1075
- else:
1076
- if current_paper['summary']:
1077
- current_paper['summary'] += ' ' + line
1078
- else:
1079
- current_paper['summary'] = line
1080
-
1081
- except Exception as e:
1082
- st.warning(f"Error parsing line {i}: {str(e)}")
1083
- continue
1084
-
1085
- # Add final paper if exists
1086
- if current_paper:
1087
- results.append(current_paper)
1088
-
1089
- return results[:20] # Ensure we don't exceed 20 papers
1090
-
1091
- async def create_paper_audio_files(papers: List[Dict], input_question: str):
1092
- """Generate audio files for papers asynchronously with progress tracking."""
1093
- with PerformanceTimer("paper_audio_generation"):
1094
- tasks = []
1095
- for paper in papers:
1096
- try:
1097
- # Prepare text for audio generation
1098
- audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
1099
- audio_text = clean_for_speech(audio_text)
1100
-
1101
- # Create task for audio generation
1102
- task = async_edge_tts_generate(
1103
- audio_text,
1104
- voice=st.session_state['tts_voice'],
1105
- file_format=st.session_state['audio_format']
1106
- )
1107
- tasks.append((paper, task))
1108
-
1109
- except Exception as e:
1110
- st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}")
1111
- continue
1112
-
1113
- # Process all audio generation tasks concurrently
1114
- for paper, task in tasks:
1115
- try:
1116
- audio_file, gen_time = await task
1117
- if audio_file:
1118
- paper['full_audio'] = audio_file
1119
- if st.session_state['enable_download']:
1120
- paper['download_base64'] = create_download_link_with_cache(
1121
- audio_file,
1122
- st.session_state['audio_format']
1123
- )
1124
- except Exception as e:
1125
- st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
1126
- paper['full_audio'] = None
1127
- paper['download_base64'] = ''
1128
-
1129
-
1130
  def initialize_marquee_settings():
1131
- """Initialize default marquee settings if not present in session state."""
1132
  if 'marquee_settings' not in st.session_state:
1133
  st.session_state['marquee_settings'] = {
1134
  "background": "#1E1E1E",
@@ -1140,50 +164,24 @@ def initialize_marquee_settings():
1140
  }
1141
 
1142
  def get_marquee_settings():
1143
- """Get current marquee settings, initializing if needed."""
1144
  initialize_marquee_settings()
1145
  return st.session_state['marquee_settings']
1146
 
1147
  def update_marquee_settings_ui():
1148
- """Add color pickers & sliders for marquee configuration in sidebar."""
1149
  st.sidebar.markdown("### ๐ŸŽฏ Marquee Settings")
1150
-
1151
- # Create two columns for settings
1152
  cols = st.sidebar.columns(2)
1153
-
1154
- # Column 1: Color settings
1155
  with cols[0]:
1156
- # Background color picker
1157
- bg_color = st.color_picker(
1158
- "๐ŸŽจ Background",
1159
- st.session_state['marquee_settings']["background"],
1160
- key="bg_color_picker"
1161
- )
1162
-
1163
- # Text color picker
1164
- text_color = st.color_picker(
1165
- "โœ๏ธ Text Color",
1166
- st.session_state['marquee_settings']["color"],
1167
- key="text_color_picker"
1168
- )
1169
-
1170
- # Column 2: Size and speed settings
1171
  with cols[1]:
1172
- # Font size slider
1173
- font_size = st.slider(
1174
- "๐Ÿ“ Font Size",
1175
- 10, 24, 14,
1176
- key="font_size_slider"
1177
- )
1178
-
1179
- # Animation duration slider
1180
- duration = st.slider(
1181
- "โฑ๏ธ Animation Speed",
1182
- 1, 20, 20,
1183
- key="duration_slider"
1184
- )
1185
-
1186
- # Update session state with new settings
1187
  st.session_state['marquee_settings'].update({
1188
  "background": bg_color,
1189
  "color": text_color,
@@ -1191,254 +189,51 @@ def update_marquee_settings_ui():
1191
  "animationDuration": f"{duration}s"
1192
  })
1193
 
1194
- def display_marquee(text: str, settings: dict, key_suffix: str = ""):
1195
- """Show marquee text with specified style settings."""
1196
- # Truncate long text to prevent performance issues
1197
  truncated_text = text[:280] + "..." if len(text) > 280 else text
1198
-
1199
- # Display the marquee
1200
  streamlit_marquee(
1201
  content=truncated_text,
1202
  **settings,
1203
  key=f"marquee_{key_suffix}"
1204
  )
1205
-
1206
- # Add spacing after marquee
1207
  st.write("")
1208
 
1209
- def create_paper_links_md(papers: list) -> str:
1210
- """Creates a minimal markdown file linking to each paper's arxiv URL."""
1211
- lines = ["# Paper Links\n"]
1212
- for i, p in enumerate(papers, start=1):
1213
- lines.append(f"{i}. **{p['title']}** โ€” [Arxiv]({p['url']})")
1214
- return "\n".join(lines)
1215
-
1216
- def apply_custom_styling():
1217
- """Apply custom CSS styling to the app."""
1218
- st.markdown("""
1219
- <style>
1220
- .main {
1221
- background: linear-gradient(to right, #1a1a1a, #2d2d2d);
1222
- color: #fff;
1223
- }
1224
- .stMarkdown {
1225
- font-family: 'Helvetica Neue', sans-serif;
1226
- }
1227
- .stButton>button {
1228
- margin-right: 0.5rem;
1229
- }
1230
- .streamlit-marquee {
1231
- margin: 1rem 0;
1232
- border-radius: 4px;
1233
- }
1234
- .st-emotion-cache-1y4p8pa {
1235
- padding: 1rem;
1236
- }
1237
- </style>
1238
- """, unsafe_allow_html=True)
1239
-
1240
- def display_performance_metrics(timings: dict):
1241
- """Display performance metrics with visualizations."""
1242
- st.sidebar.markdown("### โฑ๏ธ Performance Metrics")
1243
-
1244
- # Calculate total time
1245
- total_time = sum(timings.values())
1246
- st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
1247
-
1248
- # Show breakdown of operations
1249
- st.sidebar.markdown("#### Operation Breakdown")
1250
- for operation, duration in timings.items():
1251
- percentage = (duration / total_time) * 100 if total_time > 0 else 0
1252
- st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
1253
-
1254
- # Create a progress bar for visual representation
1255
- st.sidebar.progress(percentage / 100)
1256
-
1257
-
1258
-
1259
-
1260
- def display_papers(papers: List[Dict], marquee_settings: Dict):
1261
- """Display paper information with enhanced visualization."""
1262
- with PerformanceTimer("paper_display"):
1263
- st.write("## ๐Ÿ“š Research Papers")
1264
-
1265
- # Create tabs for different views
1266
- tab1, tab2 = st.tabs(["๐Ÿ“‹ List View", "๐Ÿ“Š Grid View"])
1267
-
1268
- with tab1:
1269
- for i, paper in enumerate(papers, start=1):
1270
- # Create marquee for paper title
1271
- marquee_text = f"๐Ÿ“„ {paper['title']} | ๐Ÿ‘ค {paper['authors'][:120]}"
1272
- display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
1273
-
1274
- # Paper details expander
1275
- with st.expander(f"{i}. ๐Ÿ“„ {paper['title']}", expanded=True):
1276
- # Create PDF link
1277
- pdf_url = paper['url'].replace('/abs/', '/pdf/')
1278
-
1279
- # Display paper information
1280
- st.markdown(f"""
1281
- **Date:** {paper['date']}
1282
- **Title:** {paper['title']}
1283
- **Links:** ๐Ÿ“„ [Abstract]({paper['url']}) | ๐Ÿ“‘ [PDF]({pdf_url})
1284
- """)
1285
- st.markdown(f"**Authors:** {paper['authors']}")
1286
- st.markdown(f"**Summary:** {paper['summary']}")
1287
-
1288
- # Audio player and download if available
1289
- if paper.get('full_audio'):
1290
- st.write("๐ŸŽง Paper Audio Summary")
1291
- st.audio(paper['full_audio'])
1292
- if paper['download_base64']:
1293
- st.markdown(paper['download_base64'], unsafe_allow_html=True)
1294
-
1295
- with tab2:
1296
- # Create a grid layout of papers
1297
- cols = st.columns(3)
1298
- for i, paper in enumerate(papers):
1299
- with cols[i % 3]:
1300
- st.markdown(f"""
1301
- ### ๐Ÿ“„ {paper['title'][:50]}...
1302
- **Date:** {paper['date']}
1303
- [Abstract]({paper['url']}) | [PDF]({paper['url'].replace('/abs/', '/pdf/')})
1304
- """)
1305
- if paper.get('full_audio'):
1306
- st.audio(paper['full_audio'])
1307
-
1308
- def display_papers_in_sidebar(papers: List[Dict]):
1309
- """Display paper listing in sidebar with lazy loading."""
1310
- with PerformanceTimer("sidebar_display"):
1311
- st.sidebar.title("๐Ÿ“š Papers Overview")
1312
-
1313
- # Add filter options
1314
- filter_date = st.sidebar.date_input("Filter by date:", None)
1315
- search_term = st.sidebar.text_input("Search papers:", "")
1316
-
1317
- # Filter papers based on criteria
1318
- filtered_papers = papers
1319
- if filter_date:
1320
- filtered_papers = [p for p in filtered_papers
1321
- if filter_date.strftime("%Y-%m-%d") in p['date']]
1322
- if search_term:
1323
- search_lower = search_term.lower()
1324
- filtered_papers = [p for p in filtered_papers
1325
- if search_lower in p['title'].lower()
1326
- or search_lower in p['authors'].lower()]
1327
-
1328
- # Display filtered papers
1329
- for i, paper in enumerate(filtered_papers, start=1):
1330
- paper_key = f"paper_{paper['url']}"
1331
- if paper_key not in st.session_state:
1332
- st.session_state[paper_key] = False
1333
-
1334
- with st.sidebar.expander(f"{i}. {paper['title'][:50]}...", expanded=False):
1335
- # Paper metadata
1336
- st.markdown(f"**Date:** {paper['date']}")
1337
-
1338
- # Links
1339
- pdf_url = paper['url'].replace('/abs/', '/pdf/')
1340
- st.markdown(f"๐Ÿ“„ [Abstract]({paper['url']}) | ๐Ÿ“‘ [PDF]({pdf_url})")
1341
-
1342
- # Preview of authors and summary
1343
- st.markdown(f"**Authors:** {paper['authors'][:100]}...")
1344
- if paper['summary']:
1345
- st.markdown(f"**Summary:** {paper['summary'][:200]}...")
1346
-
1347
- # Audio controls
1348
- if paper['full_audio']:
1349
- if st.button("๐ŸŽต Load Audio", key=f"btn_{paper_key}"):
1350
- st.session_state[paper_key] = True
1351
-
1352
- if st.session_state[paper_key]:
1353
- st.audio(paper['full_audio'])
1354
- if paper['download_base64']:
1355
- st.markdown(paper['download_base64'], unsafe_allow_html=True)
1356
-
1357
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1358
- # 5. FILE MANAGEMENT & HISTORY
1359
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1360
-
1361
- def create_file(prompt: str, response: str, file_type: str = "md") -> str:
1362
- """Create a file with proper naming and error handling."""
1363
- with PerformanceTimer("file_creation"):
1364
- try:
1365
- # Generate filename
1366
- filename = generate_filename(prompt.strip(), response.strip(), file_type)
1367
-
1368
- # Ensure directory exists
1369
- os.makedirs("generated_files", exist_ok=True)
1370
- filepath = os.path.join("generated_files", filename)
1371
-
1372
- # Write content
1373
- with open(filepath, 'w', encoding='utf-8') as f:
1374
- if file_type == "md":
1375
- f.write(f"# Query\n{prompt}\n\n# Response\n{response}")
1376
- else:
1377
- f.write(f"{prompt}\n\n{response}")
1378
-
1379
- return filepath
1380
-
1381
- except Exception as e:
1382
- st.error(f"Error creating file: {str(e)}")
1383
- return ""
1384
-
1385
- def get_high_info_terms(text: str, top_n: int = 10) -> List[str]:
1386
- """Extract most informative terms from text."""
1387
- # Common English stop words to filter out
1388
- stop_words = set([
1389
- 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to',
1390
- 'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'over',
1391
- 'after', 'the', 'this', 'that', 'these', 'those', 'what', 'which'
1392
- ])
1393
-
1394
- # Extract words and bi-grams
1395
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
1396
  bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
1397
-
1398
- # Combine and filter terms
1399
  combined = words + bi_grams
1400
- filtered = [term for term in combined
1401
- if term not in stop_words
1402
- and len(term.split()) <= 2
1403
- and len(term) > 3]
1404
-
1405
- # Count and return top terms
1406
  counter = Counter(filtered)
1407
  return [term for term, freq in counter.most_common(top_n)]
1408
 
1409
  def clean_text_for_filename(text: str) -> str:
1410
- """Clean text for use in filenames."""
1411
- # Remove special characters
1412
  text = text.lower()
1413
  text = re.sub(r'[^\w\s-]', '', text)
1414
-
1415
- # Remove common unhelpful words
1416
- stop_words = set([
1417
- 'the', 'and', 'for', 'with', 'this', 'that', 'what', 'which',
1418
- 'where', 'when', 'why', 'how', 'who', 'whom', 'whose', 'ai',
1419
- 'library', 'function', 'method', 'class', 'object', 'variable'
1420
- ])
1421
-
1422
  words = text.split()
1423
- filtered = [w for w in words if len(w) > 3 and w not in stop_words]
1424
-
 
1425
  return '_'.join(filtered)[:200]
1426
 
1427
- def generate_filename(prompt: str, response: str, file_type: str = "md",
1428
- max_length: int = 200) -> str:
1429
- """Generate descriptive filename from content."""
1430
- # Get timestamp prefix
 
 
 
 
1431
  prefix = format_timestamp_prefix() + "_"
1432
-
1433
- # Extract informative terms
1434
- combined_text = (prompt + " " + response)[:500]
1435
- info_terms = get_high_info_terms(combined_text, top_n=5)
1436
-
1437
- # Get content snippet
1438
  snippet = (prompt[:40] + " " + response[:40]).strip()
1439
  snippet_cleaned = clean_text_for_filename(snippet)
1440
 
1441
- # Combine and deduplicate parts
1442
  name_parts = info_terms + [snippet_cleaned]
1443
  seen = set()
1444
  unique_parts = []
@@ -1447,7 +242,6 @@ def generate_filename(prompt: str, response: str, file_type: str = "md",
1447
  seen.add(part)
1448
  unique_parts.append(part)
1449
 
1450
- # Create final filename
1451
  full_name = '_'.join(unique_parts).strip('_')
1452
  leftover_chars = max_length - len(prefix) - len(file_type) - 1
1453
  if len(full_name) > leftover_chars:
@@ -1455,415 +249,579 @@ def generate_filename(prompt: str, response: str, file_type: str = "md",
1455
 
1456
  return f"{prefix}{full_name}.{file_type}"
1457
 
1458
- def create_zip_of_files(md_files: List[str], mp3_files: List[str],
1459
- wav_files: List[str], input_question: str) -> Optional[str]:
1460
- """Create zip archive of files with optimization."""
1461
- with PerformanceTimer("zip_creation"):
1462
- # Filter out readme and empty files
1463
- md_files = [f for f in md_files
1464
- if os.path.basename(f).lower() != 'readme.md'
1465
- and os.path.getsize(f) > 0]
1466
-
1467
- all_files = md_files + mp3_files + wav_files
1468
- if not all_files:
1469
- return None
1470
-
1471
- try:
1472
- # Generate zip name
1473
- all_content = []
1474
- for f in all_files:
1475
- if f.endswith('.md'):
1476
- with open(f, 'r', encoding='utf-8') as file:
1477
- all_content.append(file.read())
1478
- elif f.endswith(('.mp3', '.wav')):
1479
- basename = os.path.splitext(os.path.basename(f))[0]
1480
- all_content.append(basename.replace('_', ' '))
1481
-
1482
- all_content.append(input_question)
1483
- combined_content = " ".join(all_content)
1484
- info_terms = get_high_info_terms(combined_content, top_n=10)
1485
-
1486
- timestamp = format_timestamp_prefix()
1487
- name_text = '-'.join(term for term in info_terms[:5])
1488
- zip_name = f"archive_{timestamp}_{name_text[:50]}.zip"
1489
-
1490
- # Create zip file
1491
- with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as z:
1492
- for f in all_files:
1493
- z.write(f, os.path.basename(f))
1494
-
1495
- return zip_name
1496
-
1497
- except Exception as e:
1498
- st.error(f"Error creating zip archive: {str(e)}")
1499
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1500
 
1501
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1502
- # 6. OPTIMIZED AI LOOKUP & PROCESSING
1503
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1504
 
1505
- def perform_ai_lookup(q: str, vocal_summary: bool = True,
1506
- extended_refs: bool = False,
1507
- titles_summary: bool = True,
1508
- full_audio: bool = False) -> Tuple[str, Dict[str, float]]:
1509
- """Main AI lookup routine with performance optimization."""
1510
- with PerformanceTimer("total_lookup") as total_timer:
1511
- timings = {}
1512
-
1513
- # Add operation controls if not present
1514
- if 'operation_controls' not in st.session_state:
1515
- st.sidebar.markdown("### ๐Ÿ”ง Operation Controls")
1516
- st.session_state['enable_claude'] = st.sidebar.checkbox(
1517
- "Enable Claude Search",
1518
- value=st.session_state['enable_claude']
1519
- )
1520
- st.session_state['enable_audio'] = st.sidebar.checkbox(
1521
- "Generate Audio",
1522
- value=st.session_state['enable_audio']
1523
- )
1524
- st.session_state['enable_download'] = st.sidebar.checkbox(
1525
- "Create Download Links",
1526
- value=st.session_state['enable_download']
1527
- )
1528
- st.session_state['operation_controls'] = True
1529
 
1530
- result = ""
1531
-
1532
- # 1. Claude API (if enabled)
1533
- if st.session_state['enable_claude']:
1534
- with PerformanceTimer("claude_api") as claude_timer:
1535
- try:
1536
- client = anthropic.Anthropic(api_key=anthropic_key)
1537
- response = client.messages.create(
1538
- model="claude-3-sonnet-20240229",
1539
- max_tokens=1000,
1540
- messages=[{"role": "user", "content": q}]
1541
- )
1542
- st.write("Claude's reply ๐Ÿง :")
1543
- st.markdown(response.content[0].text)
1544
- result = response.content[0].text
1545
- timings['claude_api'] = time.time() - claude_timer.start_time
1546
- except Exception as e:
1547
- st.error(f"Error with Claude API: {str(e)}")
1548
- result = "Error occurred during Claude API call"
1549
- timings['claude_api'] = 0
1550
-
1551
- # 2. Async save and audio generation
1552
- async def process_results():
1553
- with PerformanceTimer("results_processing") as proc_timer:
1554
- md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio(
1555
- q, result
1556
- )
1557
- timings['markdown_save'] = md_time
1558
- timings['audio_generation'] = audio_time
1559
 
1560
- if audio_file and st.session_state['enable_audio']:
1561
- st.subheader("๐Ÿ“ Main Response Audio")
1562
- st.audio(audio_file)
1563
-
1564
- if st.session_state['enable_download']:
1565
- st.markdown(
1566
- create_download_link_with_cache(
1567
- audio_file,
1568
- st.session_state['audio_format']
1569
- ),
1570
- unsafe_allow_html=True
1571
- )
1572
-
1573
- # Run async operations
1574
- asyncio.run(process_results())
1575
-
1576
- # 3. Arxiv RAG with performance tracking
1577
- if st.session_state['enable_claude']:
1578
- with PerformanceTimer("arxiv_rag") as rag_timer:
1579
- try:
1580
- st.write('Running Arxiv RAG with Claude inputs.')
1581
- client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
1582
- refs = client.predict(
1583
- q,
1584
- 10,
1585
- "Semantic Search",
1586
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
1587
- api_name="/update_with_rag_md"
1588
- )[0]
1589
- timings['arxiv_rag'] = time.time() - rag_timer.start_time
1590
-
1591
- # Process papers asynchronously
1592
- papers = parse_arxiv_refs(refs)
1593
- if papers:
1594
- with PerformanceTimer("paper_processing") as paper_timer:
1595
- async def process_papers():
1596
- # Create minimal links page
1597
- paper_links = create_paper_links_md(papers)
1598
- links_file = create_file(q, paper_links, "md")
1599
- st.markdown(paper_links)
1600
-
1601
- # Generate audio and display papers
1602
- await create_paper_audio_files(papers, q)
1603
- display_papers(papers, get_marquee_settings())
1604
- display_papers_in_sidebar(papers)
1605
-
1606
- asyncio.run(process_papers())
1607
- timings['paper_processing'] = time.time() - paper_timer.start_time
1608
- else:
1609
- st.warning("No papers found in the response.")
1610
- except Exception as e:
1611
- st.error(f"Error during Arxiv RAG: {str(e)}")
1612
- timings['arxiv_rag'] = 0
1613
-
1614
- return result, timings
1615
-
1616
- def process_voice_input(text: str):
1617
- """Process voice input with enhanced error handling and feedback."""
1618
- if not text:
1619
- st.warning("Please provide some input text.")
1620
- return
1621
 
1622
- with PerformanceTimer("voice_processing"):
 
 
 
 
 
1623
  try:
1624
- st.subheader("๐Ÿ” Search Results")
1625
- result, timings = perform_ai_lookup(
1626
- text,
1627
- vocal_summary=True,
1628
- extended_refs=False,
1629
- titles_summary=True,
1630
- full_audio=True
1631
  )
1632
-
1633
- # Save results
1634
- md_file, audio_file = save_qa_with_audio(text, result)
1635
-
1636
- # Display results
1637
- st.subheader("๐Ÿ“ Generated Files")
1638
- col1, col2 = st.columns(2)
1639
- with col1:
1640
- st.write(f"๐Ÿ“„ Markdown: {os.path.basename(md_file)}")
1641
- st.markdown(get_download_link(md_file, "md"), unsafe_allow_html=True)
1642
-
1643
- with col2:
1644
- if audio_file:
1645
- st.write(f"๐ŸŽต Audio: {os.path.basename(audio_file)}")
1646
- play_and_download_audio(
1647
- audio_file,
1648
- st.session_state['audio_format']
1649
- )
1650
 
1651
  except Exception as e:
1652
- st.error(f"Error processing voice input: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1653
 
1654
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1655
- # 7. SIDEBAR AND FILE HISTORY
1656
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1657
 
1658
- def display_file_history_in_sidebar():
1659
- """Display file history with enhanced organization and filtering."""
1660
- with PerformanceTimer("file_history"):
1661
- st.sidebar.markdown("---")
1662
- st.sidebar.markdown("### ๐Ÿ“‚ File History")
1663
-
1664
- # Gather all files
1665
- md_files = glob.glob("*.md")
1666
- mp3_files = glob.glob("*.mp3")
1667
- wav_files = glob.glob("*.wav")
1668
- all_files = md_files + mp3_files + wav_files
1669
-
1670
- if not all_files:
1671
- st.sidebar.write("No files found.")
1672
- return
1673
-
1674
- # Add file management controls
1675
- col1, col2 = st.sidebar.columns(2)
1676
- with col1:
1677
- if st.button("๐Ÿ—‘ Delete All"):
1678
- try:
1679
- for f in all_files:
1680
- os.remove(f)
1681
- st.session_state.should_rerun = True
1682
- st.success("All files deleted successfully.")
1683
- except Exception as e:
1684
- st.error(f"Error deleting files: {str(e)}")
1685
-
1686
- with col2:
1687
- if st.button("โฌ‡๏ธ Zip All"):
1688
- zip_name = create_zip_of_files(
1689
- md_files,
1690
- mp3_files,
1691
- wav_files,
1692
- st.session_state.get('last_query', '')
1693
- )
1694
- if zip_name:
1695
- st.sidebar.markdown(
1696
- get_download_link(zip_name, "zip"),
1697
- unsafe_allow_html=True
1698
- )
1699
-
1700
- # Add file filtering options
1701
- st.sidebar.markdown("### ๐Ÿ” Filter Files")
1702
- file_search = st.sidebar.text_input("Search files:", "")
1703
- file_type_filter = st.sidebar.multiselect(
1704
- "File types:",
1705
- ["Markdown", "Audio"],
1706
- default=["Markdown", "Audio"]
1707
- )
1708
-
1709
- # Sort files by modification time
1710
- all_files.sort(key=os.path.getmtime, reverse=True)
1711
-
1712
- # Filter files based on search and type
1713
- filtered_files = []
1714
  for f in all_files:
1715
- if file_search.lower() in f.lower():
1716
- ext = os.path.splitext(f)[1].lower()
1717
- if (("Markdown" in file_type_filter and ext == ".md") or
1718
- ("Audio" in file_type_filter and ext in [".mp3", ".wav"])):
1719
- filtered_files.append(f)
1720
-
1721
- # Display filtered files
1722
- for f in filtered_files:
1723
- fname = os.path.basename(f)
1724
- ext = os.path.splitext(fname)[1].lower().strip('.')
1725
- emoji = FILE_EMOJIS.get(ext, '๐Ÿ“ฆ')
1726
-
1727
- # Get file metadata
1728
- mod_time = datetime.fromtimestamp(os.path.getmtime(f))
1729
- time_str = mod_time.strftime("%Y-%m-%d %H:%M:%S")
1730
- file_size = os.path.getsize(f) / 1024 # Size in KB
1731
-
1732
- with st.sidebar.expander(f"{emoji} {fname}"):
1733
- st.write(f"**Modified:** {time_str}")
1734
- st.write(f"**Size:** {file_size:.1f} KB")
1735
-
1736
- if ext == "md":
1737
- try:
1738
- with open(f, "r", encoding="utf-8") as file_in:
1739
- snippet = file_in.read(200).replace("\n", " ")
1740
- if len(snippet) == 200:
1741
- snippet += "..."
1742
- st.write(snippet)
1743
- st.markdown(
1744
- get_download_link(f, file_type="md"),
1745
- unsafe_allow_html=True
1746
- )
1747
- except Exception as e:
1748
- st.error(f"Error reading markdown file: {str(e)}")
1749
-
1750
- elif ext in ["mp3", "wav"]:
1751
- st.audio(f)
1752
- st.markdown(
1753
- get_download_link(f, file_type=ext),
1754
- unsafe_allow_html=True
1755
- )
1756
-
1757
- else:
1758
- st.markdown(get_download_link(f), unsafe_allow_html=True)
1759
 
1760
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1761
- # 8. MAIN APPLICATION
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1762
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1763
 
1764
  def main():
1765
- """Main application entry point with enhanced UI and error handling."""
1766
- try:
1767
- # 1. Setup marquee UI in sidebar
1768
- update_marquee_settings_ui()
1769
- marquee_settings = get_marquee_settings()
1770
-
1771
- # 2. Display welcome marquee
1772
- display_marquee(
1773
- st.session_state['marquee_content'],
1774
- {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
1775
- key_suffix="welcome"
1776
- )
1777
-
1778
- # 3. Main action tabs
1779
- tab_main = st.radio(
1780
- "Action:",
1781
- ["๐ŸŽค Voice", "๐Ÿ“ธ Media", "๐Ÿ” ArXiv", "๐Ÿ“ Editor"],
1782
- horizontal=True
1783
- )
1784
-
1785
- # Custom component usage
1786
- mycomponent = components.declare_component(
1787
- "mycomponent",
1788
- path="mycomponent"
1789
- )
1790
- val = mycomponent(my_input_value="Hello")
1791
-
1792
- if val:
1793
- # Process input value
1794
- val_stripped = val.replace('\\n', ' ')
1795
- edited_input = st.text_area(
1796
- "โœ๏ธ Edit Input:",
1797
- value=val_stripped,
1798
- height=100
1799
- )
1800
 
1801
- # Model selection and options
1802
- run_option = st.selectbox("Model:", ["Arxiv"])
1803
- col1, col2 = st.columns(2)
1804
-
1805
- with col1:
1806
- #autorun = st.checkbox("โš™ AutoRun", value=True)
1807
- autorun = st.checkbox("โš™ AutoRun", value=False)
1808
- with col2:
1809
- full_audio = st.checkbox("๐Ÿ“š FullAudio", value=False)
1810
-
1811
- # Check for input changes
1812
- input_changed = (val != st.session_state.old_val)
1813
-
1814
- if autorun and input_changed:
1815
- st.session_state.old_val = val
1816
- st.session_state.last_query = edited_input
1817
- result, timings = perform_ai_lookup(
1818
- edited_input,
1819
- vocal_summary=True,
1820
- extended_refs=False,
1821
- titles_summary=True,
1822
- full_audio=full_audio
1823
- )
1824
-
1825
- # Display performance metrics
1826
- display_performance_metrics(timings)
1827
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1828
  else:
1829
- if st.button("โ–ถ Run"):
1830
- st.session_state.old_val = val
1831
- st.session_state.last_query = edited_input
1832
- result, timings = perform_ai_lookup(
1833
- edited_input,
1834
- vocal_summary=True,
1835
- extended_refs=False,
1836
- titles_summary=True,
1837
- full_audio=full_audio
1838
- )
1839
-
1840
- # Display performance metrics
1841
- display_performance_metrics(timings)
1842
-
1843
- # Tab-specific content
1844
- if tab_main == "๐Ÿ” ArXiv":
1845
- display_arxiv_tab()
1846
- elif tab_main == "๐ŸŽค Voice":
1847
- display_voice_tab()
1848
- elif tab_main == "๐Ÿ“ธ Media":
1849
- display_media_tab()
1850
- elif tab_main == "๐Ÿ“ Editor":
1851
- display_editor_tab()
1852
-
1853
- # Display file history
1854
- display_file_history_in_sidebar()
1855
-
1856
- # Apply styling
1857
- apply_custom_styling()
1858
-
1859
- # Check for rerun
1860
- if st.session_state.should_rerun:
1861
- st.session_state.should_rerun = False
1862
- st.rerun()
1863
-
1864
- except Exception as e:
1865
- st.error(f"An error occurred in the main application: {str(e)}")
1866
- st.info("Please try refreshing the page or contact support if the issue persists.")
 
 
 
1867
 
1868
  if __name__ == "__main__":
1869
- main()
 
1
  import streamlit as st
2
+ import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import plotly.graph_objects as go
4
  import streamlit.components.v1 as components
5
  from datetime import datetime
 
20
  import asyncio
21
  import edge_tts
22
  from streamlit_marquee import streamlit_marquee
 
 
 
23
 
24
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
25
  # 1. CORE CONFIGURATION & SETUP
 
35
  'About': "๐ŸšฒTalkingAIResearcher๐Ÿ†"
36
  }
37
  )
 
38
  load_dotenv()
39
 
40
  # Available English voices for Edge TTS
 
50
  "en-CA-LiamNeural"
51
  ]
52
 
53
+ # Session state variables
54
+ if 'marquee_settings' not in st.session_state:
55
+ st.session_state['marquee_settings'] = {
56
  "background": "#1E1E1E",
57
  "color": "#FFFFFF",
58
  "font-size": "14px",
59
  "animationDuration": "20s",
60
  "width": "100%",
61
  "lineHeight": "35px"
62
+ }
63
+
64
+ if 'tts_voice' not in st.session_state:
65
+ st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
66
+
67
+ if 'audio_format' not in st.session_state:
68
+ st.session_state['audio_format'] = 'mp3'
69
+
70
+ if 'transcript_history' not in st.session_state:
71
+ st.session_state['transcript_history'] = []
72
+
73
+ if 'chat_history' not in st.session_state:
74
+ st.session_state['chat_history'] = []
75
+
76
+ if 'openai_model' not in st.session_state:
77
+ st.session_state['openai_model'] = "gpt-4o-2024-05-13"
78
+
79
+ if 'messages' not in st.session_state:
80
+ st.session_state['messages'] = []
81
+
82
+ if 'last_voice_input' not in st.session_state:
83
+ st.session_state['last_voice_input'] = ""
84
+
85
+ if 'editing_file' not in st.session_state:
86
+ st.session_state['editing_file'] = None
87
+
88
+ if 'edit_new_name' not in st.session_state:
89
+ st.session_state['edit_new_name'] = ""
90
+
91
+ if 'edit_new_content' not in st.session_state:
92
+ st.session_state['edit_new_content'] = ""
93
 
94
+ if 'viewing_prefix' not in st.session_state:
95
+ st.session_state['viewing_prefix'] = None
 
 
96
 
97
+ if 'should_rerun' not in st.session_state:
98
+ st.session_state['should_rerun'] = False
99
+
100
+ if 'old_val' not in st.session_state:
101
+ st.session_state['old_val'] = None
102
+
103
+ if 'last_query' not in st.session_state:
104
+ st.session_state['last_query'] = ""
105
+
106
+ if 'marquee_content' not in st.session_state:
107
+ st.session_state['marquee_content'] = "๐Ÿš€ Welcome to TalkingAIResearcher | ๐Ÿค– Your Research Assistant"
108
+
109
+ # New: default AutoRun to False (off)
110
+ if 'autorun' not in st.session_state:
111
+ st.session_state['autorun'] = False
112
+
113
+ # API Keys
114
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
115
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
116
+ xai_key = os.getenv('xai',"")
 
117
  if 'OPENAI_API_KEY' in st.secrets:
118
  openai_api_key = st.secrets['OPENAI_API_KEY']
119
  if 'ANTHROPIC_API_KEY' in st.secrets:
 
124
  HF_KEY = os.getenv('HF_KEY')
125
  API_URL = os.getenv('API_URL')
126
 
127
+ # Helper constants
128
  FILE_EMOJIS = {
129
  "md": "๐Ÿ“",
130
  "mp3": "๐ŸŽต",
131
  "wav": "๐Ÿ”Š",
132
+ "pdf": "๐Ÿ“•",
133
+ "mp4": "๐ŸŽฅ",
134
+ "csv": "๐Ÿ“ˆ",
135
+ "xlsx": "๐Ÿ“Š",
136
+ "html": "๐ŸŒ",
137
+ "py": "๐Ÿ",
138
+ "txt": "๐Ÿ“„"
139
  }
140
 
141
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝ๏ฟฝโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
142
+ # 2. HELPER FUNCTIONS
143
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
144
 
145
+ def get_central_time():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  """Get current time in US Central timezone."""
147
  central = pytz.timezone('US/Central')
148
  return datetime.now(central)
149
 
150
+ def format_timestamp_prefix():
151
  """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM."""
152
  ct = get_central_time()
153
  return ct.strftime("%m_%d_%y_%I_%M_%p")
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def initialize_marquee_settings():
 
156
  if 'marquee_settings' not in st.session_state:
157
  st.session_state['marquee_settings'] = {
158
  "background": "#1E1E1E",
 
164
  }
165
 
166
  def get_marquee_settings():
 
167
  initialize_marquee_settings()
168
  return st.session_state['marquee_settings']
169
 
170
  def update_marquee_settings_ui():
171
+ """Add color pickers & sliders for marquee config in sidebar."""
172
  st.sidebar.markdown("### ๐ŸŽฏ Marquee Settings")
 
 
173
  cols = st.sidebar.columns(2)
 
 
174
  with cols[0]:
175
+ bg_color = st.color_picker("๐ŸŽจ Background",
176
+ st.session_state['marquee_settings']["background"],
177
+ key="bg_color_picker")
178
+ text_color = st.color_picker("โœ๏ธ Text",
179
+ st.session_state['marquee_settings']["color"],
180
+ key="text_color_picker")
 
 
 
 
 
 
 
 
 
181
  with cols[1]:
182
+ font_size = st.slider("๐Ÿ“ Size", 10, 24, 14, key="font_size_slider")
183
+ duration = st.slider("โฑ๏ธ Speed", 1, 20, 20, key="duration_slider")
184
+
 
 
 
 
 
 
 
 
 
 
 
 
185
  st.session_state['marquee_settings'].update({
186
  "background": bg_color,
187
  "color": text_color,
 
189
  "animationDuration": f"{duration}s"
190
  })
191
 
192
+ def display_marquee(text, settings, key_suffix=""):
193
+ """Show marquee text with style from settings."""
 
194
  truncated_text = text[:280] + "..." if len(text) > 280 else text
 
 
195
  streamlit_marquee(
196
  content=truncated_text,
197
  **settings,
198
  key=f"marquee_{key_suffix}"
199
  )
 
 
200
  st.write("")
201
 
202
+ def get_high_info_terms(text: str, top_n=10) -> list:
203
+ """Extract top_n freq words or bigrams (excluding stopwords)."""
204
+ stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
206
  bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
 
 
207
  combined = words + bi_grams
208
+ filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2]
 
 
 
 
 
209
  counter = Counter(filtered)
210
  return [term for term, freq in counter.most_common(top_n)]
211
 
212
  def clean_text_for_filename(text: str) -> str:
213
+ """Remove special chars, short words, etc. for filenames."""
 
214
  text = text.lower()
215
  text = re.sub(r'[^\w\s-]', '', text)
 
 
 
 
 
 
 
 
216
  words = text.split()
217
+ # remove short or unhelpful words
218
+ stop_short = set(['the', 'and', 'for', 'with', 'this', 'that', 'ai', 'library'])
219
+ filtered = [w for w in words if len(w) > 3 and w not in stop_short]
220
  return '_'.join(filtered)[:200]
221
 
222
+ def generate_filename(prompt, response, file_type="md", max_length=200):
223
+ """
224
+ Generate a shortened filename by:
225
+ 1) extracting high-info terms,
226
+ 2) snippet from prompt+response,
227
+ 3) remove duplicates,
228
+ 4) truncate if needed.
229
+ """
230
  prefix = format_timestamp_prefix() + "_"
231
+ combined_text = (prompt + " " + response)[:200]
232
+ info_terms = get_high_info_terms(combined_text, top_n=5)
 
 
 
 
233
  snippet = (prompt[:40] + " " + response[:40]).strip()
234
  snippet_cleaned = clean_text_for_filename(snippet)
235
 
236
+ # remove duplicates
237
  name_parts = info_terms + [snippet_cleaned]
238
  seen = set()
239
  unique_parts = []
 
242
  seen.add(part)
243
  unique_parts.append(part)
244
 
 
245
  full_name = '_'.join(unique_parts).strip('_')
246
  leftover_chars = max_length - len(prefix) - len(file_type) - 1
247
  if len(full_name) > leftover_chars:
 
249
 
250
  return f"{prefix}{full_name}.{file_type}"
251
 
252
+ def create_file(prompt, response, file_type="md"):
253
+ """Create a text file from prompt + response with sanitized filename."""
254
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
255
+ with open(filename, 'w', encoding='utf-8') as f:
256
+ f.write(prompt + "\n\n" + response)
257
+ return filename
258
+
259
+ def get_download_link(file, file_type="zip"):
260
+ """
261
+ Convert a file to base64 and return an HTML link for download.
262
+ """
263
+ with open(file, "rb") as f:
264
+ b64 = base64.b64encode(f.read()).decode()
265
+ if file_type == "zip":
266
+ return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ“‚ Download {os.path.basename(file)}</a>'
267
+ elif file_type == "mp3":
268
+ return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">๐ŸŽต Download {os.path.basename(file)}</a>'
269
+ elif file_type == "wav":
270
+ return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ”Š Download {os.path.basename(file)}</a>'
271
+ elif file_type == "md":
272
+ return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ“ Download {os.path.basename(file)}</a>'
273
+ else:
274
+ return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
275
+
276
+ def clean_for_speech(text: str) -> str:
277
+ """Clean up text for TTS output."""
278
+ text = text.replace("\n", " ")
279
+ text = text.replace("</s>", " ")
280
+ text = text.replace("#", "")
281
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
282
+ text = re.sub(r"\s+", " ", text).strip()
283
+ return text
284
+
285
+ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
286
+ """Async TTS generation with edge-tts library."""
287
+ text = clean_for_speech(text)
288
+ if not text.strip():
289
+ return None
290
+ rate_str = f"{rate:+d}%"
291
+ pitch_str = f"{pitch:+d}Hz"
292
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
293
+ out_fn = generate_filename(text, text, file_type=file_format)
294
+ await communicate.save(out_fn)
295
+ return out_fn
296
+
297
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
298
+ """Wrapper for the async TTS generate call."""
299
+ return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
300
+
301
+ def play_and_download_audio(file_path, file_type="mp3"):
302
+ """Streamlit audio + a quick download link."""
303
+ if file_path and os.path.exists(file_path):
304
+ st.audio(file_path)
305
+ dl_link = get_download_link(file_path, file_type=file_type)
306
+ st.markdown(dl_link, unsafe_allow_html=True)
307
+
308
+ def save_qa_with_audio(question, answer, voice=None):
309
+ """Save Q&A to markdown and also generate audio."""
310
+ if not voice:
311
+ voice = st.session_state['tts_voice']
312
+
313
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
314
+ md_file = create_file(question, answer, "md")
315
+ audio_text = f"{question}\n\nAnswer: {answer}"
316
+ audio_file = speak_with_edge_tts(
317
+ audio_text,
318
+ voice=voice,
319
+ file_format=st.session_state['audio_format']
320
+ )
321
+ return md_file, audio_file
322
 
323
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
324
+ # 3. PAPER PARSING & DISPLAY
325
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
326
 
327
+ def parse_arxiv_refs(ref_text: str):
328
+ """
329
+ Given a multi-line markdown with arxiv references, parse them into
330
+ a list of dicts: {date, title, url, authors, summary, ...}.
331
+ """
332
+ if not ref_text:
333
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
+ results = []
336
+ current_paper = {}
337
+ lines = ref_text.split('\n')
338
+
339
+ for i, line in enumerate(lines):
340
+ if line.count('|') == 2:
341
+ # Found a new paper line
342
+ if current_paper:
343
+ results.append(current_paper)
344
+ if len(results) >= 20:
345
+ break
346
+ try:
347
+ header_parts = line.strip('* ').split('|')
348
+ date = header_parts[0].strip()
349
+ title = header_parts[1].strip()
350
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
351
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
+ current_paper = {
354
+ 'date': date,
355
+ 'title': title,
356
+ 'url': url,
357
+ 'authors': '',
358
+ 'summary': '',
359
+ 'full_audio': None,
360
+ 'download_base64': '',
361
+ }
362
+ except Exception as e:
363
+ st.warning(f"Error parsing paper header: {str(e)}")
364
+ current_paper = {}
365
+ continue
366
+
367
+ elif current_paper:
368
+ # If authors not set, fill it; otherwise, fill summary
369
+ if not current_paper['authors']:
370
+ current_paper['authors'] = line.strip('* ')
371
+ else:
372
+ if current_paper['summary']:
373
+ current_paper['summary'] += ' ' + line.strip()
374
+ else:
375
+ current_paper['summary'] = line.strip()
376
+
377
+ if current_paper:
378
+ results.append(current_paper)
379
+
380
+ return results[:20]
381
+
382
+ def create_paper_links_md(papers):
383
+ """Creates a minimal .md content linking to each paper's arxiv URL."""
384
+ lines = ["# Paper Links\n"]
385
+ for i, p in enumerate(papers, start=1):
386
+ lines.append(f"{i}. **{p['title']}** โ€” [Arxiv]({p['url']})")
387
+ return "\n".join(lines)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
+ def create_paper_audio_files(papers, input_question):
390
+ """
391
+ For each paper, generate TTS audio summary, store the path in `paper['full_audio']`,
392
+ and also store a base64 link for stable downloading.
393
+ """
394
+ for paper in papers:
395
  try:
396
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
397
+ audio_text = clean_for_speech(audio_text)
398
+ file_format = st.session_state['audio_format']
399
+ audio_file = speak_with_edge_tts(
400
+ audio_text,
401
+ voice=st.session_state['tts_voice'],
402
+ file_format=file_format
403
  )
404
+ paper['full_audio'] = audio_file
405
+
406
+ if audio_file:
407
+ with open(audio_file, "rb") as af:
408
+ b64_data = base64.b64encode(af.read()).decode()
409
+ download_filename = os.path.basename(audio_file)
410
+ mime_type = "mpeg" if file_format == "mp3" else "wav"
411
+ paper['download_base64'] = (
412
+ f'<a href="data:audio/{mime_type};base64,{b64_data}" '
413
+ f'download="{download_filename}">๐ŸŽต Download {download_filename}</a>'
414
+ )
 
 
 
 
 
 
 
415
 
416
  except Exception as e:
417
+ st.warning(f"Error processing paper {paper['title']}: {str(e)}")
418
+ paper['full_audio'] = None
419
+ paper['download_base64'] = ''
420
+
421
+ def display_papers(papers, marquee_settings):
422
+ """Display paper info in the main area with marquee + expanders + audio."""
423
+ st.write("## Research Papers")
424
+ for i, paper in enumerate(papers, start=1):
425
+ marquee_text = f"๐Ÿ“„ {paper['title']} | ๐Ÿ‘ค {paper['authors'][:120]} | ๐Ÿ“ {paper['summary'][:200]}"
426
+ display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
427
+
428
+ with st.expander(f"{i}. ๐Ÿ“„ {paper['title']}", expanded=True):
429
+ st.markdown(f"**{paper['date']} | {paper['title']}** โ€” [Arxiv Link]({paper['url']})")
430
+ st.markdown(f"*Authors:* {paper['authors']}")
431
+ st.markdown(paper['summary'])
432
+ if paper.get('full_audio'):
433
+ st.write("๐Ÿ“š Paper Audio")
434
+ st.audio(paper['full_audio'])
435
+ if paper['download_base64']:
436
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
437
+
438
+ def display_papers_in_sidebar(papers):
439
+ """Mirrors the paper listing in the sidebar with expanders, audio, etc."""
440
+ st.sidebar.title("๐ŸŽถ Papers & Audio")
441
+ for i, paper in enumerate(papers, start=1):
442
+ with st.sidebar.expander(f"{i}. {paper['title']}"):
443
+ st.markdown(f"**Arxiv:** [Link]({paper['url']})")
444
+ if paper['full_audio']:
445
+ st.audio(paper['full_audio'])
446
+ if paper['download_base64']:
447
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
448
+ st.markdown(f"**Authors:** {paper['authors']}")
449
+ if paper['summary']:
450
+ st.markdown(f"**Summary:** {paper['summary'][:300]}...")
451
 
452
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
453
+ # 4. ZIP & DELETE-ALL UTILS
454
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
455
 
456
+ def create_zip_of_all_files():
457
+ """
458
+ Zip up all recognized file types, limiting the final zip name to ~20 chars
459
+ to avoid overly long base64 strings.
460
+ """
461
+ # Patterns for .md, .pdf, .mp4, .mp3, .wav, .csv, .xlsx, .html, .py, .txt
462
+ file_patterns = [
463
+ "*.md", "*.pdf", "*.mp4", "*.mp3", "*.wav",
464
+ "*.csv", "*.xlsx", "*.html", "*.py", "*.txt"
465
+ ]
466
+ all_files = []
467
+ for pat in file_patterns:
468
+ all_files.extend(glob.glob(pat))
469
+ all_files = list(set(all_files)) # unique
470
+
471
+ if not all_files:
472
+ return None
473
+
474
+ # Combine content for naming
475
+ all_content = []
476
+ for f in all_files:
477
+ if f.endswith(".md"):
478
+ with open(f, "r", encoding="utf-8") as fin:
479
+ all_content.append(fin.read())
480
+ else:
481
+ all_content.append(os.path.basename(f))
482
+
483
+ # Add last query if relevant
484
+ if st.session_state['last_query']:
485
+ all_content.append(st.session_state['last_query'])
486
+
487
+ combined_content = " ".join(all_content)
488
+ info_terms = get_high_info_terms(combined_content, top_n=10)
489
+
490
+ timestamp = format_timestamp_prefix()
491
+ name_text = '-'.join(term for term in info_terms[:5])
492
+ short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
493
+
494
+ with zipfile.ZipFile(short_zip_name, 'w') as z:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  for f in all_files:
496
+ z.write(f)
497
+ return short_zip_name
498
+
499
+ def delete_all_files():
500
+ """Removes all recognized file types from the directory."""
501
+ file_patterns = [
502
+ "*.md", "*.pdf", "*.mp4", "*.mp3", "*.wav",
503
+ "*.csv", "*.xlsx", "*.html", "*.py", "*.txt"
504
+ ]
505
+ for pat in file_patterns:
506
+ for f in glob.glob(pat):
507
+ os.remove(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
 
509
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
510
+ # 5. MAIN LOGIC: AI LOOKUP & VOICE INPUT
511
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
512
+
513
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
514
+ titles_summary=True, full_audio=False):
515
+ """Main routine that uses Anthropic (Claude) + Gradio ArXiv RAG pipeline."""
516
+ start = time.time()
517
+
518
+ # --- 1) Claude API
519
+ client = anthropic.Anthropic(api_key=anthropic_key)
520
+ user_input = q
521
+ response = client.messages.create(
522
+ model="claude-3-sonnet-20240229",
523
+ max_tokens=1000,
524
+ messages=[
525
+ {"role": "user", "content": user_input}
526
+ ])
527
+ st.write("Claude's reply ๐Ÿง :")
528
+ st.markdown(response.content[0].text)
529
+
530
+ # Save & produce audio
531
+ result = response.content[0].text
532
+ create_file(q, result)
533
+ md_file, audio_file = save_qa_with_audio(q, result)
534
+ st.subheader("๐Ÿ“ Main Response Audio")
535
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
536
+
537
+ # --- 2) Arxiv RAG
538
+ st.write("Arxiv's AI this Evening is Mixtral 8x7B...")
539
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
540
+ refs = client.predict(
541
+ q,
542
+ 20,
543
+ "Semantic Search",
544
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
545
+ api_name="/update_with_rag_md"
546
+ )[0]
547
+ r2 = client.predict(
548
+ q,
549
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
550
+ True,
551
+ api_name="/ask_llm"
552
+ )
553
+
554
+ result = f"### ๐Ÿ”Ž {q}\n\n{r2}\n\n{refs}"
555
+ md_file, audio_file = save_qa_with_audio(q, result)
556
+ st.subheader("๐Ÿ“ Main Response Audio")
557
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
558
+
559
+ # --- 3) Parse + handle papers
560
+ papers = parse_arxiv_refs(refs)
561
+ if papers:
562
+ # Create minimal links page first
563
+ paper_links = create_paper_links_md(papers)
564
+ links_file = create_file(q, paper_links, "md")
565
+ st.markdown(paper_links)
566
+
567
+ # Then create audio for each paper
568
+ create_paper_audio_files(papers, input_question=q)
569
+ display_papers(papers, get_marquee_settings())
570
+ display_papers_in_sidebar(papers)
571
+ else:
572
+ st.warning("No papers found in the response.")
573
+
574
+ elapsed = time.time() - start
575
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
576
+ return result
577
+
578
+ def process_voice_input(text):
579
+ """When user sends voice query, we run the AI lookup + Q&A with audio."""
580
+ if not text:
581
+ return
582
+ st.subheader("๐Ÿ” Search Results")
583
+ result = perform_ai_lookup(
584
+ text,
585
+ vocal_summary=True,
586
+ extended_refs=False,
587
+ titles_summary=True,
588
+ full_audio=True
589
+ )
590
+ md_file, audio_file = save_qa_with_audio(text, result)
591
+ st.subheader("๐Ÿ“ Generated Files")
592
+ st.write(f"Markdown: {md_file}")
593
+ st.write(f"Audio: {audio_file}")
594
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
595
+
596
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
597
+ # 6. FILE HISTORY SIDEBAR
598
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
599
+
600
+ def display_file_history_in_sidebar():
601
+ """
602
+ Shows a history of each recognized file in descending
603
+ order of modification time, with quick icons and optional download links.
604
+ """
605
+ st.sidebar.markdown("---")
606
+ st.sidebar.markdown("### ๐Ÿ“‚ File History")
607
+
608
+ # Patterns for .md, .mp3, .wav, .pdf, .mp4, .csv, .xlsx, .html, .py, .txt
609
+ patterns = [
610
+ "*.md", "*.pdf", "*.mp4", "*.mp3", "*.wav",
611
+ "*.csv", "*.xlsx", "*.html", "*.py", "*.txt"
612
+ ]
613
+ all_files = []
614
+ for p in patterns:
615
+ all_files.extend(glob.glob(p))
616
+ all_files = list(set(all_files)) # unique
617
+
618
+ if not all_files:
619
+ st.sidebar.write("No files found.")
620
+ return
621
+
622
+ # Sort newest first
623
+ all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
624
+
625
+ for f in all_files:
626
+ fname = os.path.basename(f)
627
+ ext = os.path.splitext(fname)[1].lower().strip('.')
628
+ emoji = FILE_EMOJIS.get(ext, '๐Ÿ“ฆ')
629
+ time_str = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
630
+
631
+ with st.sidebar.expander(f"{emoji} {fname}"):
632
+ st.write(f"**Modified:** {time_str}")
633
+ if ext == "md":
634
+ with open(f, "r", encoding="utf-8") as file_in:
635
+ snippet = file_in.read(200).replace("\n", " ")
636
+ if len(snippet) == 200:
637
+ snippet += "..."
638
+ st.write(snippet)
639
+ st.markdown(get_download_link(f, file_type="md"), unsafe_allow_html=True)
640
+ elif ext in ["mp3","wav"]:
641
+ st.audio(f)
642
+ st.markdown(get_download_link(f, file_type=ext), unsafe_allow_html=True)
643
+ else:
644
+ st.markdown(get_download_link(f), unsafe_allow_html=True)
645
+
646
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
647
+ # 7. MAIN APP
648
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
649
 
650
  def main():
651
+ """
652
+ Main Streamlit app.
653
+ Now includes:
654
+ 1) Voice & AutoRun at the top of the sidebar,
655
+ 2) File Tools (Delete All / Zip All) in the sidebar,
656
+ 3) A new '๐Ÿ“ค Upload' tab,
657
+ 4) Everything else from your original code snippet.
658
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
659
 
660
+ # -- 1) Voice & AutoRun at top of sidebar --
661
+ st.sidebar.title("Global Settings")
662
+ selected_voice = st.sidebar.selectbox(
663
+ "TTS Voice",
664
+ options=EDGE_TTS_VOICES,
665
+ index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
666
+ )
667
+ # Autorun defaults to off (False)
668
+ st.session_state.autorun = st.sidebar.checkbox("AutoRun", value=st.session_state.autorun)
669
+
670
+ # Audio format
671
+ audio_format = st.sidebar.radio("Audio Format", ["MP3","WAV"], index=0)
672
+ if selected_voice != st.session_state['tts_voice']:
673
+ st.session_state['tts_voice'] = selected_voice
674
+ st.experimental_rerun()
675
+ if audio_format.lower() != st.session_state['audio_format']:
676
+ st.session_state['audio_format'] = audio_format.lower()
677
+ st.experimental_rerun()
678
+
679
+ # -- 2) File Tools: Delete All / Zip All
680
+ st.sidebar.markdown("---")
681
+ st.sidebar.markdown("### ๐Ÿ—ƒ File Tools")
682
+ col_del, col_zip = st.sidebar.columns(2)
683
+ with col_del:
684
+ if st.button("๐Ÿ—‘ Delete All"):
685
+ delete_all_files()
686
+ st.sidebar.success("All recognized files removed!")
687
+ st.experimental_rerun()
688
+ with col_zip:
689
+ if st.button("๐Ÿ“ฆ Zip All"):
690
+ zip_name = create_zip_of_all_files()
691
+ if zip_name:
692
+ st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
693
+
694
+ # -- 3) Marquee Settings
695
+ update_marquee_settings_ui()
696
+ marquee_settings = get_marquee_settings()
697
+
698
+ # -- 4) File History in sidebar
699
+ display_file_history_in_sidebar()
700
+
701
+ # -- 5) Display marquee
702
+ display_marquee(st.session_state['marquee_content'],
703
+ {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
704
+ key_suffix="welcome")
705
+
706
+ # -- 6) Main action tabs
707
+ tab_main = st.radio(
708
+ "Action:",
709
+ ["๐Ÿ“ค Upload", "๐ŸŽค Voice", "๐Ÿ“ธ Media", "๐Ÿ” ArXiv", "๐Ÿ“ Editor"],
710
+ horizontal=True
711
+ )
712
+
713
+ # 6a) Upload Tab
714
+ if tab_main == "๐Ÿ“ค Upload":
715
+ st.header("๐Ÿ“ค Upload Files")
716
+ accepted_types = [
717
+ # We'll accept basically everything (None in file_uploader),
718
+ # but let's specify for clarity:
719
+ "text/plain", "text/markdown", "audio/mpeg", "audio/wav",
720
+ "image/png", "image/jpeg", "video/mp4", "application/pdf",
721
+ "application/vnd.ms-excel",
722
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
723
+ "text/html", "application/octet-stream",
724
+ ]
725
+ uploaded = st.file_uploader("Select files to upload:",
726
+ accept_multiple_files=True,
727
+ type=None)
728
+ if uploaded:
729
+ for uf in uploaded:
730
+ with open(uf.name, "wb") as outfile:
731
+ outfile.write(uf.read())
732
+ st.success("Uploaded!")
733
+ st.session_state.should_rerun = True
734
+
735
+ # 6b) Voice Tab
736
+ elif tab_main == "๐ŸŽค Voice":
737
+ st.subheader("๐ŸŽค Voice Input")
738
+ user_text = st.text_area("๐Ÿ’ฌ Message:", height=100)
739
+ user_text = user_text.strip().replace('\n', ' ')
740
+
741
+ if st.button("๐Ÿ“จ Send"):
742
+ process_voice_input(user_text)
743
+
744
+ st.subheader("๐Ÿ“œ Chat History")
745
+ for c in st.session_state.chat_history:
746
+ st.write("**You:**", c["user"])
747
+ st.write("**Response:**", c["claude"])
748
+
749
+ # 6c) Media Tab
750
+ elif tab_main == "๐Ÿ“ธ Media":
751
+ st.header("๐Ÿ“ธ Media Gallery")
752
+ tabs = st.tabs(["๐ŸŽต Audio", "๐Ÿ–ผ Images", "๐ŸŽฅ Video"])
753
+ with tabs[0]:
754
+ st.subheader("๐ŸŽต Audio Files")
755
+ audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
756
+ if audio_files:
757
+ for a in audio_files:
758
+ with st.expander(os.path.basename(a)):
759
+ st.audio(a)
760
+ ext = os.path.splitext(a)[1].replace('.', '')
761
+ dl_link = get_download_link(a, file_type=ext)
762
+ st.markdown(dl_link, unsafe_allow_html=True)
763
+ else:
764
+ st.write("No audio files found.")
765
+ with tabs[1]:
766
+ st.subheader("๐Ÿ–ผ Image Files")
767
+ imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
768
+ if imgs:
769
+ c = st.slider("Cols", 1, 5, 3, key="cols_images")
770
+ cols = st.columns(c)
771
+ for i, f in enumerate(imgs):
772
+ with cols[i % c]:
773
+ st.image(Image.open(f), use_container_width=True)
774
+ else:
775
+ st.write("No images found.")
776
+ with tabs[2]:
777
+ st.subheader("๐ŸŽฅ Video Files")
778
+ vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
779
+ if vids:
780
+ for v in vids:
781
+ with st.expander(os.path.basename(v)):
782
+ st.video(v)
783
  else:
784
+ st.write("No videos found.")
785
+
786
+ # 6d) ArXiv Tab
787
+ elif tab_main == "๐Ÿ” ArXiv":
788
+ st.subheader("๐Ÿ” Query ArXiv")
789
+ q = st.text_input("๐Ÿ” Query:", key="arxiv_query")
790
+
791
+ st.markdown("### ๐ŸŽ› Options")
792
+ st.write("(AutoRun is in the sidebar.)")
793
+ extended_refs = st.checkbox("๐Ÿ“œLongRefs", value=False, key="option_extended_refs")
794
+ titles_summary = st.checkbox("๐Ÿ”–TitlesOnly", value=True, key="option_titles_summary")
795
+ full_audio = st.checkbox("๐Ÿ“šFullAudio", value=False, key="option_full_audio")
796
+ full_transcript = st.checkbox("๐ŸงพFullTranscript", value=False, key="option_full_transcript")
797
+
798
+ if q and st.button("๐Ÿ”Run"):
799
+ st.session_state.last_query = q
800
+ result = perform_ai_lookup(q,
801
+ extended_refs=extended_refs,
802
+ titles_summary=titles_summary,
803
+ full_audio=full_audio)
804
+ if full_transcript:
805
+ create_file(q, result, "md")
806
+
807
+ # If AutoRun is ON and user typed something
808
+ if st.session_state.autorun and q:
809
+ st.session_state.last_query = q
810
+ result = perform_ai_lookup(q,
811
+ extended_refs=extended_refs,
812
+ titles_summary=titles_summary,
813
+ full_audio=full_audio)
814
+ if full_transcript:
815
+ create_file(q, result, "md")
816
+
817
+ # 6e) Editor Tab
818
+ elif tab_main == "๐Ÿ“ Editor":
819
+ st.write("Select or create a file to edit. (Currently minimal demo)")
820
+
821
+ # Rerun if needed
822
+ if st.session_state.should_rerun:
823
+ st.session_state.should_rerun = False
824
+ st.experimental_rerun()
825
 
826
  if __name__ == "__main__":
827
+ main()