awacke1 commited on
Commit
f0f1a57
ยท
verified ยท
1 Parent(s): b4bdbf1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +879 -804
app.py CHANGED
@@ -1,5 +1,19 @@
1
  import streamlit as st
2
- import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import plotly.graph_objects as go
4
  import streamlit.components.v1 as components
5
  from datetime import datetime
@@ -20,6 +34,9 @@ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
  import asyncio
21
  import edge_tts
22
  from streamlit_marquee import streamlit_marquee
 
 
 
23
 
24
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
25
  # 1. CORE CONFIGURATION & SETUP
@@ -35,6 +52,7 @@ st.set_page_config(
35
  'About': "๐ŸšฒTalkingAIResearcher๐Ÿ†"
36
  }
37
  )
 
38
  load_dotenv()
39
 
40
  # Available English voices for Edge TTS
@@ -50,66 +68,51 @@ EDGE_TTS_VOICES = [
50
  "en-CA-LiamNeural"
51
  ]
52
 
53
- # Session state variables
54
- if 'marquee_settings' not in st.session_state:
55
- st.session_state['marquee_settings'] = {
56
  "background": "#1E1E1E",
57
  "color": "#FFFFFF",
58
  "font-size": "14px",
59
  "animationDuration": "20s",
60
  "width": "100%",
61
  "lineHeight": "35px"
62
- }
63
-
64
- if 'tts_voice' not in st.session_state:
65
- st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
66
-
67
- if 'audio_format' not in st.session_state:
68
- st.session_state['audio_format'] = 'mp3'
69
-
70
- if 'transcript_history' not in st.session_state:
71
- st.session_state['transcript_history'] = []
72
-
73
- if 'chat_history' not in st.session_state:
74
- st.session_state['chat_history'] = []
75
-
76
- if 'openai_model' not in st.session_state:
77
- st.session_state['openai_model'] = "gpt-4o-2024-05-13"
78
-
79
- if 'messages' not in st.session_state:
80
- st.session_state['messages'] = []
81
-
82
- if 'last_voice_input' not in st.session_state:
83
- st.session_state['last_voice_input'] = ""
84
-
85
- if 'editing_file' not in st.session_state:
86
- st.session_state['editing_file'] = None
87
-
88
- if 'edit_new_name' not in st.session_state:
89
- st.session_state['edit_new_name'] = ""
90
-
91
- if 'edit_new_content' not in st.session_state:
92
- st.session_state['edit_new_content'] = ""
93
-
94
- if 'viewing_prefix' not in st.session_state:
95
- st.session_state['viewing_prefix'] = None
96
-
97
- if 'should_rerun' not in st.session_state:
98
- st.session_state['should_rerun'] = False
99
-
100
- if 'old_val' not in st.session_state:
101
- st.session_state['old_val'] = None
102
-
103
- if 'last_query' not in st.session_state:
104
- st.session_state['last_query'] = ""
105
 
106
- if 'marquee_content' not in st.session_state:
107
- st.session_state['marquee_content'] = "๐Ÿš€ Welcome to TalkingAIResearcher | ๐Ÿค– Your Research Assistant"
 
 
108
 
109
- # API Keys
110
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
111
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
112
- xai_key = os.getenv('xai',"")
 
113
  if 'OPENAI_API_KEY' in st.secrets:
114
  openai_api_key = st.secrets['OPENAI_API_KEY']
115
  if 'ANTHROPIC_API_KEY' in st.secrets:
@@ -120,109 +123,476 @@ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_OR
120
  HF_KEY = os.getenv('HF_KEY')
121
  API_URL = os.getenv('API_URL')
122
 
123
- # Helper constants
124
  FILE_EMOJIS = {
125
  "md": "๐Ÿ“",
126
  "mp3": "๐ŸŽต",
127
- "wav": "๐Ÿ”Š"
 
 
 
 
128
  }
129
 
130
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
131
- # 2. HELPER FUNCTIONS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
133
 
134
- def get_central_time():
135
- """Get current time in US Central timezone."""
136
- central = pytz.timezone('US/Central')
137
- return datetime.now(central)
138
-
139
- def format_timestamp_prefix():
140
- """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM."""
141
- ct = get_central_time()
142
- return ct.strftime("%m_%d_%y_%I_%M_%p")
143
-
144
- def initialize_marquee_settings():
145
- if 'marquee_settings' not in st.session_state:
146
- st.session_state['marquee_settings'] = {
147
- "background": "#1E1E1E",
148
- "color": "#FFFFFF",
149
- "font-size": "14px",
150
- "animationDuration": "20s",
151
- "width": "100%",
152
- "lineHeight": "35px"
153
- }
154
-
155
- def get_marquee_settings():
156
- initialize_marquee_settings()
157
- return st.session_state['marquee_settings']
158
-
159
- def update_marquee_settings_ui():
160
- """Add color pickers & sliders for marquee config in sidebar."""
161
- st.sidebar.markdown("### ๐ŸŽฏ Marquee Settings")
162
- cols = st.sidebar.columns(2)
163
- with cols[0]:
164
- bg_color = st.color_picker("๐ŸŽจ Background",
165
- st.session_state['marquee_settings']["background"],
166
- key="bg_color_picker")
167
- text_color = st.color_picker("โœ๏ธ Text",
168
- st.session_state['marquee_settings']["color"],
169
- key="text_color_picker")
170
- with cols[1]:
171
- font_size = st.slider("๐Ÿ“ Size", 10, 24, 14, key="font_size_slider")
172
- duration = st.slider("โฑ๏ธ Speed", 1, 20, 20, key="duration_slider")
173
-
174
- st.session_state['marquee_settings'].update({
175
- "background": bg_color,
176
- "color": text_color,
177
- "font-size": f"{font_size}px",
178
- "animationDuration": f"{duration}s"
179
- })
180
-
181
- def display_marquee(text, settings, key_suffix=""):
182
- """Show marquee text with style from settings."""
183
- truncated_text = text[:280] + "..." if len(text) > 280 else text
184
- streamlit_marquee(
185
- content=truncated_text,
186
- **settings,
187
- key=f"marquee_{key_suffix}"
188
- )
189
- st.write("")
190
-
191
- def get_high_info_terms(text: str, top_n=10) -> list:
192
- """Extract top_n freq words or bigrams (excluding stopwords)."""
193
- stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
194
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
195
  bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
 
 
196
  combined = words + bi_grams
197
- filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2]
 
 
 
 
 
198
  counter = Counter(filtered)
199
  return [term for term, freq in counter.most_common(top_n)]
200
 
201
  def clean_text_for_filename(text: str) -> str:
202
- """Remove special chars, short words, etc. for filenames."""
 
203
  text = text.lower()
204
  text = re.sub(r'[^\w\s-]', '', text)
 
 
 
 
 
 
 
 
205
  words = text.split()
206
- # remove short or unhelpful words
207
- stop_short = set(['the', 'and', 'for', 'with', 'this', 'that', 'ai', 'library'])
208
- filtered = [w for w in words if len(w) > 3 and w not in stop_short]
209
  return '_'.join(filtered)[:200]
210
 
211
- def generate_filename(prompt, response, file_type="md", max_length=200):
212
- """
213
- Generate a shortened filename by:
214
- 1) extracting high-info terms,
215
- 2) snippet from prompt+response,
216
- 3) remove duplicates,
217
- 4) truncate if needed.
218
- """
219
  prefix = format_timestamp_prefix() + "_"
220
- combined_text = (prompt + " " + response)[:200]
221
- info_terms = get_high_info_terms(combined_text, top_n=5)
 
 
 
 
222
  snippet = (prompt[:40] + " " + response[:40]).strip()
223
  snippet_cleaned = clean_text_for_filename(snippet)
224
 
225
- # remove duplicates
226
  name_parts = info_terms + [snippet_cleaned]
227
  seen = set()
228
  unique_parts = []
@@ -231,6 +601,7 @@ def generate_filename(prompt, response, file_type="md", max_length=200):
231
  seen.add(part)
232
  unique_parts.append(part)
233
 
 
234
  full_name = '_'.join(unique_parts).strip('_')
235
  leftover_chars = max_length - len(prefix) - len(file_type) - 1
236
  if len(full_name) > leftover_chars:
@@ -238,710 +609,414 @@ def generate_filename(prompt, response, file_type="md", max_length=200):
238
 
239
  return f"{prefix}{full_name}.{file_type}"
240
 
241
- def create_file(prompt, response, file_type="md"):
242
- """Create a text file from prompt + response with sanitized filename."""
243
- filename = generate_filename(prompt.strip(), response.strip(), file_type)
244
- with open(filename, 'w', encoding='utf-8') as f:
245
- f.write(prompt + "\n\n" + response)
246
- return filename
247
-
248
- def get_download_link(file, file_type="zip"):
249
- """
250
- Convert a file to base64 and return an HTML link for download.
251
- """
252
- with open(file, "rb") as f:
253
- b64 = base64.b64encode(f.read()).decode()
254
- if file_type == "zip":
255
- return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ“‚ Download {os.path.basename(file)}</a>'
256
- elif file_type == "mp3":
257
- return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">๐ŸŽต Download {os.path.basename(file)}</a>'
258
- elif file_type == "wav":
259
- return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ”Š Download {os.path.basename(file)}</a>'
260
- elif file_type == "md":
261
- return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ“ Download {os.path.basename(file)}</a>'
262
- else:
263
- return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
264
-
265
- def clean_for_speech(text: str) -> str:
266
- """Clean up text for TTS output."""
267
- text = text.replace("\n", " ")
268
- text = text.replace("</s>", " ")
269
- text = text.replace("#", "")
270
- text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
271
- text = re.sub(r"\s+", " ", text).strip()
272
- return text
273
-
274
- async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
275
- """Async TTS generation with edge-tts library."""
276
- text = clean_for_speech(text)
277
- if not text.strip():
278
- return None
279
- rate_str = f"{rate:+d}%"
280
- pitch_str = f"{pitch:+d}Hz"
281
- communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
282
- out_fn = generate_filename(text, text, file_type=file_format)
283
- await communicate.save(out_fn)
284
- return out_fn
285
-
286
- def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
287
- """Wrapper for the async TTS generate call."""
288
- return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
289
-
290
- def play_and_download_audio(file_path, file_type="mp3"):
291
- """Streamlit audio + a quick download link."""
292
- if file_path and os.path.exists(file_path):
293
- st.audio(file_path)
294
- dl_link = get_download_link(file_path, file_type=file_type)
295
- st.markdown(dl_link, unsafe_allow_html=True)
296
-
297
- def save_qa_with_audio(question, answer, voice=None):
298
- """Save Q&A to markdown and also generate audio."""
299
- if not voice:
300
- voice = st.session_state['tts_voice']
301
-
302
- combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
303
- md_file = create_file(question, answer, "md")
304
- audio_text = f"{question}\n\nAnswer: {answer}"
305
- audio_file = speak_with_edge_tts(
306
- audio_text,
307
- voice=voice,
308
- file_format=st.session_state['audio_format']
309
- )
310
- return md_file, audio_file
311
-
312
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
313
- # 3. PAPER PARSING & DISPLAY
314
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
315
-
316
- def parse_arxiv_refs(ref_text: str):
317
- """
318
- Given a multi-line markdown with arxiv references, parse them into
319
- a list of dicts: {date, title, url, authors, summary, ...}.
320
- """
321
- if not ref_text:
322
- return []
323
-
324
- results = []
325
- current_paper = {}
326
- lines = ref_text.split('\n')
327
-
328
- for i, line in enumerate(lines):
329
- if line.count('|') == 2:
330
- # Found a new paper line
331
- if current_paper:
332
- results.append(current_paper)
333
- if len(results) >= 20:
334
- break
335
- try:
336
- header_parts = line.strip('* ').split('|')
337
- date = header_parts[0].strip()
338
- title = header_parts[1].strip()
339
- url_match = re.search(r'(https://arxiv.org/\S+)', line)
340
- url = url_match.group(1) if url_match else f"paper_{len(results)}"
341
-
342
- current_paper = {
343
- 'date': date,
344
- 'title': title,
345
- 'url': url,
346
- 'authors': '',
347
- 'summary': '',
348
- 'full_audio': None,
349
- 'download_base64': '',
350
- }
351
- except Exception as e:
352
- st.warning(f"Error parsing paper header: {str(e)}")
353
- current_paper = {}
354
- continue
355
 
356
- elif current_paper:
357
- # If authors not set, fill it; otherwise, fill summary
358
- if not current_paper['authors']:
359
- current_paper['authors'] = line.strip('* ')
360
- else:
361
- if current_paper['summary']:
362
- current_paper['summary'] += ' ' + line.strip()
363
- else:
364
- current_paper['summary'] = line.strip()
365
-
366
- if current_paper:
367
- results.append(current_paper)
368
-
369
- return results[:20]
370
-
371
- def create_paper_links_md(papers):
372
- """Creates a minimal .md content linking to each paper's arxiv URL."""
373
- lines = ["# Paper Links\n"]
374
- for i, p in enumerate(papers, start=1):
375
- lines.append(f"{i}. **{p['title']}** โ€” [Arxiv]({p['url']})")
376
- return "\n".join(lines)
377
-
378
- def create_paper_audio_files(papers, input_question):
379
- """
380
- For each paper, generate TTS audio summary, store the path in `paper['full_audio']`,
381
- and also store a base64 link for stable downloading.
382
- """
383
- for paper in papers:
384
- try:
385
- audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
386
- audio_text = clean_for_speech(audio_text)
387
- file_format = st.session_state['audio_format']
388
- audio_file = speak_with_edge_tts(
389
- audio_text,
390
- voice=st.session_state['tts_voice'],
391
- file_format=file_format
392
- )
393
- paper['full_audio'] = audio_file
394
-
395
- if audio_file:
396
- with open(audio_file, "rb") as af:
397
- b64_data = base64.b64encode(af.read()).decode()
398
- download_filename = os.path.basename(audio_file)
399
- mime_type = "mpeg" if file_format == "mp3" else "wav"
400
- paper['download_base64'] = (
401
- f'<a href="data:audio/{mime_type};base64,{b64_data}" '
402
- f'download="{download_filename}">๐ŸŽต Download {download_filename}</a>'
403
- )
404
-
405
- except Exception as e:
406
- st.warning(f"Error processing paper {paper['title']}: {str(e)}")
407
- paper['full_audio'] = None
408
- paper['download_base64'] = ''
409
-
410
-
411
- def display_file_history_in_sidebar():
412
- """
413
- Shows a history of files grouped by query, with lazy loading of audio and content.
414
- """
415
- st.sidebar.markdown("---")
416
- st.sidebar.markdown("### ๐Ÿ“‚ File History")
417
-
418
- # Gather all files
419
- md_files = glob.glob("*.md")
420
- mp3_files = glob.glob("*.mp3")
421
- wav_files = glob.glob("*.wav")
422
- all_files = md_files + mp3_files + wav_files
423
-
424
- if not all_files:
425
- st.sidebar.write("No files found.")
426
- return
427
-
428
- # Group files by their query prefix (timestamp_query)
429
- grouped_files = {}
430
- for f in all_files:
431
- fname = os.path.basename(f)
432
- prefix = '_'.join(fname.split('_')[:6]) # Get timestamp part
433
- if prefix not in grouped_files:
434
- grouped_files[prefix] = {'md': [], 'audio': [], 'loaded': False}
435
 
436
- ext = os.path.splitext(fname)[1].lower()
437
- if ext == '.md':
438
- grouped_files[prefix]['md'].append(f)
439
- elif ext in ['.mp3', '.wav']:
440
- grouped_files[prefix]['audio'].append(f)
441
-
442
- # Sort groups by timestamp (newest first)
443
- sorted_groups = sorted(grouped_files.items(), key=lambda x: x[0], reverse=True)
444
-
445
- # ๐Ÿ—‘โฌ‡๏ธ Sidebar delete all and zip all download
446
- col1, col4 = st.sidebar.columns(2)
447
- with col1:
448
- if st.button("๐Ÿ—‘ Delete All"):
449
  for f in all_files:
450
- os.remove(f)
451
- st.session_state.should_rerun = True
452
- with col4:
453
- if st.button("โฌ‡๏ธ Zip All"):
454
- zip_name = create_zip_of_files(md_files, mp3_files, wav_files,
455
- st.session_state.get('last_query', ''))
456
- if zip_name:
457
- st.sidebar.markdown(get_download_link(zip_name, "zip"),
458
- unsafe_allow_html=True)
459
-
460
- # Display grouped files
461
- for prefix, files in sorted_groups:
462
- # Get a preview of content from first MD file
463
- preview = ""
464
- if files['md']:
465
- with open(files['md'][0], "r", encoding="utf-8") as f:
466
- preview = f.read(200).replace("\n", " ")
467
- if len(preview) > 200:
468
- preview += "..."
469
-
470
- # Create unique key for this group
471
- group_key = f"group_{prefix}"
472
- if group_key not in st.session_state:
473
- st.session_state[group_key] = False
474
-
475
- # Display group expander
476
- with st.sidebar.expander(f"๐Ÿ“‘ Query Group: {prefix}"):
477
- st.write("**Preview:**")
478
- st.write(preview)
479
 
480
- # Load full content button
481
- if st.button("๐Ÿ“– View Full Content", key=f"btn_{prefix}"):
482
- st.session_state[group_key] = True
483
-
484
- # Only show full content and audio if button was clicked
485
- if st.session_state[group_key]:
486
- # Display markdown files
487
- for md_file in files['md']:
488
- with open(md_file, "r", encoding="utf-8") as f:
489
- content = f.read()
490
- st.markdown("**Full Content:**")
491
- st.markdown(content)
492
- st.markdown(get_download_link(md_file, file_type="md"),
493
- unsafe_allow_html=True)
494
-
495
- # Display audio files
496
- usePlaySidebar=False
497
- if usePlaySidebar:
498
- for audio_file in files['audio']:
499
- ext = os.path.splitext(audio_file)[1].replace('.', '')
500
- st.audio(audio_file)
501
- st.markdown(get_download_link(audio_file, file_type=ext),
502
- unsafe_allow_html=True)
503
-
504
- def display_papers(papers, marquee_settings):
505
- """Display paper info with both abs and PDF links."""
506
- st.write("## Research Papers")
507
- for i, paper in enumerate(papers, start=1):
508
- marquee_text = f"๐Ÿ“„ {paper['title']} | ๐Ÿ‘ค {paper['authors'][:120]}"
509
- display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
510
-
511
- with st.expander(f"{i}. ๐Ÿ“„ {paper['title']}", expanded=True):
512
- # Create PDF link by replacing 'abs' with 'pdf' in arxiv URL
513
- pdf_url = paper['url'].replace('/abs/', '/pdf/')
514
- st.markdown(f"""
515
- **{paper['date']} | {paper['title']}**
516
- ๐Ÿ“„ [Abstract]({paper['url']}) | ๐Ÿ“‘ [PDF]({pdf_url})
517
- """)
518
- st.markdown(f"*Authors:* {paper['authors']}")
519
- st.markdown(paper['summary'])
520
- if paper.get('full_audio'):
521
- st.write("๐Ÿ“š Paper Audio")
522
- st.audio(paper['full_audio'])
523
- if paper['download_base64']:
524
- st.markdown(paper['download_base64'], unsafe_allow_html=True)
525
-
526
- def display_papers_in_sidebar(papers):
527
- """Mirrors the paper listing in sidebar with lazy loading."""
528
- st.sidebar.title("๐ŸŽถ Papers & Audio")
529
- for i, paper in enumerate(papers, start=1):
530
- paper_key = f"paper_{paper['url']}"
531
- if paper_key not in st.session_state:
532
- st.session_state[paper_key] = False
533
-
534
- with st.sidebar.expander(f"{i}. {paper['title']}"):
535
- # Create PDF link
536
- pdf_url = paper['url'].replace('/abs/', '/pdf/')
537
- st.markdown(f"๐Ÿ“„ [Abstract]({paper['url']}) | ๐Ÿ“‘ [PDF]({pdf_url})")
538
 
539
- # Preview of authors and summary
540
- st.markdown(f"**Authors:** {paper['authors'][:100]}...")
541
- if paper['summary']:
542
- st.markdown(f"**Summary:** {paper['summary'][:200]}...")
543
 
544
- # Load audio button
545
- if paper['full_audio'] and st.button("๐ŸŽต Load Audio",
546
- key=f"btn_{paper_key}"):
547
- st.session_state[paper_key] = True
548
 
549
- # Show audio player and download only if requested
550
- if st.session_state[paper_key] and paper['full_audio']:
551
- st.audio(paper['full_audio'])
552
- if paper['download_base64']:
553
- st.markdown(paper['download_base64'], unsafe_allow_html=True)
554
-
555
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
556
- # 4. ZIP FUNCTION
557
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
558
-
559
- def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
560
- """
561
- Zip up all relevant files, limiting the final zip name to ~20 chars
562
- to avoid overly long base64 strings.
563
- """
564
- md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
565
- all_files = md_files + mp3_files + wav_files
566
- if not all_files:
567
- return None
568
-
569
- all_content = []
570
- for f in all_files:
571
- if f.endswith('.md'):
572
- with open(f, 'r', encoding='utf-8') as file:
573
- all_content.append(file.read())
574
- elif f.endswith('.mp3') or f.endswith('.wav'):
575
- basename = os.path.splitext(os.path.basename(f))[0]
576
- words = basename.replace('_', ' ')
577
- all_content.append(words)
578
-
579
- all_content.append(input_question)
580
- combined_content = " ".join(all_content)
581
- info_terms = get_high_info_terms(combined_content, top_n=10)
582
-
583
- timestamp = format_timestamp_prefix()
584
- name_text = '-'.join(term for term in info_terms[:5])
585
- short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
586
-
587
- with zipfile.ZipFile(short_zip_name, 'w') as z:
588
- for f in all_files:
589
- z.write(f)
590
- return short_zip_name
591
 
592
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
593
- # 5. MAIN LOGIC: AI LOOKUP & VOICE INPUT
594
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
595
 
596
- def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
597
- titles_summary=True, full_audio=False):
598
- """Main routine that uses Anthropic (Claude) + Gradio ArXiv RAG pipeline."""
599
- start = time.time()
600
- ai_constitution = """
601
- You are a talented AI coder and songwriter...
602
- """
603
-
604
- # --- 1) Claude API
605
- client = anthropic.Anthropic(api_key=anthropic_key)
606
- user_input = q
607
- response = client.messages.create(
608
- model="claude-3-sonnet-20240229",
609
- max_tokens=1000,
610
- messages=[
611
- {"role": "user", "content": user_input}
612
- ])
613
- st.write("Claude's reply ๐Ÿง :")
614
- st.markdown(response.content[0].text)
615
-
616
- # Save & produce audio
617
- result = response.content[0].text
618
- create_file(q, result)
619
- md_file, audio_file = save_qa_with_audio(q, result)
620
- st.subheader("๐Ÿ“ Main Response Audio")
621
- play_and_download_audio(audio_file, st.session_state['audio_format'])
622
-
623
- # --- 2) Arxiv RAG
624
- #st.write("Arxiv's AI this Evening is Mixtral 8x7B...")
625
- st.write('Running Arxiv RAG with Claude inputs.')
626
- client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
627
- refs = client.predict(
628
- q,
629
- 10,
630
- "Semantic Search",
631
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
632
- api_name="/update_with_rag_md"
633
- )[0]
634
-
635
- #r2 = client.predict(
636
- # q,
637
- # "mistralai/Mixtral-8x7B-Instruct-v0.1",
638
- # True,
639
- # api_name="/ask_llm"
640
- #)
641
-
642
- # --- 3) Claude API with arxiv list of papers to app.py
643
- client = anthropic.Anthropic(api_key=anthropic_key)
644
- user_input = q + '\n\n' + 'Use the paper list below to answer the question thinking through step by step how to create a streamlit app.py and requirements.txt for the solution that answers the questions with a working app to demonstrate.'+ '\n\n'
645
- response = client.messages.create(
646
- model="claude-3-sonnet-20240229",
647
- max_tokens=1000,
648
- messages=[
649
- {"role": "user", "content": user_input}
650
- ])
651
- r2 = response.content[0].text
652
- st.write("Claude's reply ๐Ÿง :")
653
- st.markdown(r2)
654
-
655
- #result = f"### ๐Ÿ”Ž {q}\n\n{r2}\n\n{refs}"
656
- result = f"๐Ÿ”Ž {r2}\n\n{refs}"
657
- md_file, audio_file = save_qa_with_audio(q, result)
658
- st.subheader("๐Ÿ“ Main Response Audio")
659
- play_and_download_audio(audio_file, st.session_state['audio_format'])
660
-
661
- # --- 3) Parse + handle papers
662
- papers = parse_arxiv_refs(refs)
663
- if papers:
664
- # Create minimal links page first
665
- paper_links = create_paper_links_md(papers)
666
- links_file = create_file(q, paper_links, "md")
667
- st.markdown(paper_links)
668
-
669
- # Then create audio for each paper
670
- create_paper_audio_files(papers, input_question=q)
671
- display_papers(papers, get_marquee_settings())
672
- display_papers_in_sidebar(papers)
673
- else:
674
- st.warning("No papers found in the response.")
675
-
676
- elapsed = time.time() - start
677
- st.write(f"**Total Elapsed:** {elapsed:.2f} s")
678
- return result
679
-
680
- def process_voice_input(text):
681
- """When user sends voice query, we run the AI lookup + Q&A with audio."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
682
  if not text:
 
683
  return
684
- st.subheader("๐Ÿ” Search Results")
685
- result = perform_ai_lookup(
686
- text,
687
- vocal_summary=True,
688
- extended_refs=False,
689
- titles_summary=True,
690
- full_audio=True
691
- )
692
- md_file, audio_file = save_qa_with_audio(text, result)
693
- st.subheader("๐Ÿ“ Generated Files")
694
- st.write(f"Markdown: {md_file}")
695
- st.write(f"Audio: {audio_file}")
696
- play_and_download_audio(audio_file, st.session_state['audio_format'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
 
698
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
699
- # 6. FILE HISTORY SIDEBAR
700
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
701
 
702
  def display_file_history_in_sidebar():
703
- """
704
- Shows a history of each local .md, .mp3, .wav file in descending
705
- order of modification time, with quick icons and optional download links.
706
- """
707
- st.sidebar.markdown("---")
708
- st.sidebar.markdown("### ๐Ÿ“‚ File History")
709
-
710
- # Gather all files
711
- md_files = glob.glob("*.md")
712
- mp3_files = glob.glob("*.mp3")
713
- wav_files = glob.glob("*.wav")
714
- all_files = md_files + mp3_files + wav_files
715
-
716
- if not all_files:
717
- st.sidebar.write("No files found.")
718
- return
 
 
 
 
 
 
 
 
 
 
719
 
720
- # ๐Ÿ—‘โฌ‡๏ธ Sidebar delete all and zip all download
721
- col1, col4 = st.sidebar.columns(2)
722
- with col1:
723
- if st.button("๐Ÿ—‘ Delete All"):
724
- for f in all_md:
725
- os.remove(f)
726
- for f in all_mp3:
727
- os.remove(f)
728
- for f in all_wav:
729
- os.remove(f)
730
- st.session_state.should_rerun = True
731
- with col4:
732
- if st.button("โฌ‡๏ธ Zip All"):
733
- zip_name = create_zip_of_files(md_files, mp3_files, wav_files, st.session_state.get('last_query', ''))
734
- if zip_name:
735
- st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
736
-
737
- # Sort newest first
738
- all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
739
-
740
- for f in all_files:
741
- fname = os.path.basename(f)
742
- ext = os.path.splitext(fname)[1].lower().strip('.')
743
- emoji = FILE_EMOJIS.get(ext, '๐Ÿ“ฆ')
744
- time_str = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
745
-
746
- with st.sidebar.expander(f"{emoji} {fname}"):
747
- st.write(f"**Modified:** {time_str}")
748
- if ext == "md":
749
- with open(f, "r", encoding="utf-8") as file_in:
750
- snippet = file_in.read(200).replace("\n", " ")
751
- if len(snippet) == 200:
752
- snippet += "..."
753
- st.write(snippet)
754
- st.markdown(get_download_link(f, file_type="md"), unsafe_allow_html=True)
755
- elif ext in ["mp3","wav"]:
756
- st.audio(f)
757
- st.markdown(get_download_link(f, file_type=ext), unsafe_allow_html=True)
758
- else:
759
- st.markdown(get_download_link(f), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
 
761
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
762
- # 7. MAIN APP
763
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
764
 
765
  def main():
766
- # 1) Setup marquee UI in the sidebar
767
- update_marquee_settings_ui()
768
- marquee_settings = get_marquee_settings()
769
-
770
- # 2) Display the marquee welcome
771
- display_marquee(st.session_state['marquee_content'],
772
- {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
773
- key_suffix="welcome")
774
-
775
- # 3) Main action tabs
776
- tab_main = st.radio("Action:", ["๐ŸŽค Voice", "๐Ÿ“ธ Media", "๐Ÿ” ArXiv", "๐Ÿ“ Editor"],
777
- horizontal=True)
778
-
779
- # Example custom component usage
780
- mycomponent = components.declare_component("mycomponent", path="mycomponent")
781
- val = mycomponent(my_input_value="Hello")
782
-
783
- if val:
784
- val_stripped = val.replace('\\n', ' ')
785
- edited_input = st.text_area("โœ๏ธ Edit Input:", value=val_stripped, height=100)
786
- run_option = st.selectbox("Model:", ["Arxiv"])
787
- col1, col2 = st.columns(2)
788
- with col1:
789
- autorun = st.checkbox("โš™ AutoRun", value=True)
790
- with col2:
791
- full_audio = st.checkbox("๐Ÿ“šFullAudio", value=False)
792
-
793
- input_changed = (val != st.session_state.old_val)
794
-
795
- if autorun and input_changed:
796
- st.session_state.old_val = val
797
- st.session_state.last_query = edited_input
798
- perform_ai_lookup(edited_input,
799
- vocal_summary=True,
800
- extended_refs=False,
801
- titles_summary=True,
802
- full_audio=full_audio)
803
- else:
804
- if st.button("โ–ถ Run"):
805
- st.session_state.old_val = val
806
- st.session_state.last_query = edited_input
807
- perform_ai_lookup(edited_input,
808
- vocal_summary=True,
809
- extended_refs=False,
810
- titles_summary=True,
811
- full_audio=full_audio)
812
-
813
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
814
- # TAB: ArXiv
815
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
816
- if tab_main == "๐Ÿ” ArXiv":
817
- st.subheader("๐Ÿ” Query ArXiv")
818
- q = st.text_input("๐Ÿ” Query:", key="arxiv_query")
819
-
820
- st.markdown("### ๐ŸŽ› Options")
821
- vocal_summary = st.checkbox("๐ŸŽ™ShortAudio", value=True, key="option_vocal_summary")
822
- extended_refs = st.checkbox("๐Ÿ“œLongRefs", value=False, key="option_extended_refs")
823
- titles_summary = st.checkbox("๐Ÿ”–TitlesOnly", value=True, key="option_titles_summary")
824
- full_audio = st.checkbox("๐Ÿ“šFullAudio", value=False, key="option_full_audio")
825
- full_transcript = st.checkbox("๐ŸงพFullTranscript", value=False, key="option_full_transcript")
826
-
827
- if q and st.button("๐Ÿ”Run"):
828
- st.session_state.last_query = q
829
- result = perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
830
- titles_summary=titles_summary, full_audio=full_audio)
831
- if full_transcript:
832
- create_file(q, result, "md")
833
-
834
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
835
- # TAB: Voice
836
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
837
- elif tab_main == "๐ŸŽค Voice":
838
- st.subheader("๐ŸŽค Voice Input")
839
-
840
- st.markdown("### ๐ŸŽค Voice Settings")
841
- selected_voice = st.selectbox(
842
- "Select TTS Voice:",
843
- options=EDGE_TTS_VOICES,
844
- index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
845
- )
846
-
847
- st.markdown("### ๐Ÿ”Š Audio Format")
848
- selected_format = st.radio(
849
- "Choose Audio Format:",
850
- options=["MP3", "WAV"],
851
- index=0
852
  )
853
 
854
- # Update session state if voice/format changes
855
- if selected_voice != st.session_state['tts_voice']:
856
- st.session_state['tts_voice'] = selected_voice
857
- st.rerun()
858
- if selected_format.lower() != st.session_state['audio_format']:
859
- st.session_state['audio_format'] = selected_format.lower()
860
- st.rerun()
861
-
862
- # Input text
863
- user_text = st.text_area("๐Ÿ’ฌ Message:", height=100)
864
- user_text = user_text.strip().replace('\n', ' ')
865
 
866
- if st.button("๐Ÿ“จ Send"):
867
- process_voice_input(user_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
868
 
869
- st.subheader("๐Ÿ“œ Chat History")
870
- for c in st.session_state.chat_history:
871
- st.write("**You:**", c["user"])
872
- st.write("**Response:**", c["claude"])
 
 
 
 
873
 
874
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
875
- # TAB: Media
876
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
877
- elif tab_main == "๐Ÿ“ธ Media":
878
- st.header("๐Ÿ“ธ Media Gallery")
879
 
880
- # By default, show audio first
881
- tabs = st.tabs(["๐ŸŽต Audio", "๐Ÿ–ผ Images", "๐ŸŽฅ Video"])
882
-
883
- # AUDIO sub-tab
884
- with tabs[0]:
885
- st.subheader("๐ŸŽต Audio Files")
886
- audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
887
- if audio_files:
888
- for a in audio_files:
889
- with st.expander(os.path.basename(a)):
890
- st.audio(a)
891
- ext = os.path.splitext(a)[1].replace('.', '')
892
- dl_link = get_download_link(a, file_type=ext)
893
- st.markdown(dl_link, unsafe_allow_html=True)
894
- else:
895
- st.write("No audio files found.")
896
-
897
- # IMAGES sub-tab
898
- with tabs[1]:
899
- st.subheader("๐Ÿ–ผ Image Files")
900
- imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
901
- if imgs:
902
- c = st.slider("Cols", 1, 5, 3, key="cols_images")
903
- cols = st.columns(c)
904
- for i, f in enumerate(imgs):
905
- with cols[i % c]:
906
- st.image(Image.open(f), use_container_width=True)
907
- else:
908
- st.write("No images found.")
909
-
910
- # VIDEO sub-tab
911
- with tabs[2]:
912
- st.subheader("๐ŸŽฅ Video Files")
913
- vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
914
- if vids:
915
- for v in vids:
916
- with st.expander(os.path.basename(v)):
917
- st.video(v)
918
  else:
919
- st.write("No videos found.")
920
-
921
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
922
- # TAB: Editor
923
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
924
- elif tab_main == "๐Ÿ“ Editor":
925
- st.write("Select or create a file to edit. (Currently minimal demo)")
926
-
927
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
928
- # SIDEBAR: FILE HISTORY
929
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
930
- display_file_history_in_sidebar()
931
-
932
- # Some light CSS styling
933
- st.markdown("""
934
- <style>
935
- .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
936
- .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
937
- .stButton>button { margin-right: 0.5rem; }
938
- </style>
939
- """, unsafe_allow_html=True)
940
-
941
- # Rerun if needed
942
- if st.session_state.should_rerun:
943
- st.session_state.should_rerun = False
944
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
945
 
946
  if __name__ == "__main__":
947
- main()
 
1
  import streamlit as st
2
+ import anthropic
3
+ import openai
4
+ import base64
5
+ import cv2
6
+ import glob
7
+ import json
8
+ import math
9
+ import os
10
+ import pytz
11
+ import random
12
+ import re
13
+ import requests
14
+ import textract
15
+ import time
16
+ import zipfile
17
  import plotly.graph_objects as go
18
  import streamlit.components.v1 as components
19
  from datetime import datetime
 
34
  import asyncio
35
  import edge_tts
36
  from streamlit_marquee import streamlit_marquee
37
+ from concurrent.futures import ThreadPoolExecutor
38
+ from functools import partial
39
+ from typing import Dict, List, Optional, Tuple, Union
40
 
41
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
42
  # 1. CORE CONFIGURATION & SETUP
 
52
  'About': "๐ŸšฒTalkingAIResearcher๐Ÿ†"
53
  }
54
  )
55
+
56
  load_dotenv()
57
 
58
  # Available English voices for Edge TTS
 
68
  "en-CA-LiamNeural"
69
  ]
70
 
71
+ # Session state initialization with default values
72
+ DEFAULT_SESSION_STATE = {
73
+ 'marquee_settings': {
74
  "background": "#1E1E1E",
75
  "color": "#FFFFFF",
76
  "font-size": "14px",
77
  "animationDuration": "20s",
78
  "width": "100%",
79
  "lineHeight": "35px"
80
+ },
81
+ 'tts_voice': EDGE_TTS_VOICES[0],
82
+ 'audio_format': 'mp3',
83
+ 'transcript_history': [],
84
+ 'chat_history': [],
85
+ 'openai_model': "gpt-4o-2024-05-13",
86
+ 'messages': [],
87
+ 'last_voice_input': "",
88
+ 'editing_file': None,
89
+ 'edit_new_name': "",
90
+ 'edit_new_content': "",
91
+ 'viewing_prefix': None,
92
+ 'should_rerun': False,
93
+ 'old_val': None,
94
+ 'last_query': "",
95
+ 'marquee_content': "๐Ÿš€ Welcome to TalkingAIResearcher | ๐Ÿค– Your Research Assistant",
96
+ 'enable_audio': False,
97
+ 'enable_download': False,
98
+ 'enable_claude': True,
99
+ 'audio_cache': {},
100
+ 'paper_cache': {},
101
+ 'download_link_cache': {},
102
+ 'performance_metrics': defaultdict(list),
103
+ 'operation_timings': defaultdict(float)
104
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ # Initialize session state
107
+ for key, value in DEFAULT_SESSION_STATE.items():
108
+ if key not in st.session_state:
109
+ st.session_state[key] = value
110
 
111
+ # API Keys and Configuration
112
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
113
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
114
+ xai_key = os.getenv('xai', "")
115
+
116
  if 'OPENAI_API_KEY' in st.secrets:
117
  openai_api_key = st.secrets['OPENAI_API_KEY']
118
  if 'ANTHROPIC_API_KEY' in st.secrets:
 
123
  HF_KEY = os.getenv('HF_KEY')
124
  API_URL = os.getenv('API_URL')
125
 
126
+ # File type emojis for display
127
  FILE_EMOJIS = {
128
  "md": "๐Ÿ“",
129
  "mp3": "๐ŸŽต",
130
+ "wav": "๐Ÿ”Š",
131
+ "pdf": "๐Ÿ“„",
132
+ "txt": "๐Ÿ“‹",
133
+ "json": "๐Ÿ“Š",
134
+ "csv": "๐Ÿ“ˆ"
135
  }
136
 
137
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
138
+ # 2. PERFORMANCE MONITORING & TIMING
139
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝ๏ฟฝโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
140
+
141
+ class PerformanceTimer:
142
+ """Context manager for timing operations with automatic logging."""
143
+ def __init__(self, operation_name: str):
144
+ self.operation_name = operation_name
145
+ self.start_time = None
146
+
147
+ def __enter__(self):
148
+ self.start_time = time.time()
149
+ return self
150
+
151
+ def __exit__(self, exc_type, exc_val, exc_tb):
152
+ if not exc_type: # Only log if no exception occurred
153
+ duration = time.time() - self.start_time
154
+ st.session_state['operation_timings'][self.operation_name] = duration
155
+ st.session_state['performance_metrics'][self.operation_name].append(duration)
156
+
157
+ def log_performance_metrics():
158
+ """Display performance metrics in the sidebar."""
159
+ st.sidebar.markdown("### โฑ๏ธ Performance Metrics")
160
+
161
+ metrics = st.session_state['operation_timings']
162
+ if metrics:
163
+ total_time = sum(metrics.values())
164
+ st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
165
+
166
+ # Create timing breakdown
167
+ for operation, duration in metrics.items():
168
+ percentage = (duration / total_time) * 100
169
+ st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
170
+
171
+ # Show timing history chart
172
+ if st.session_state['performance_metrics']:
173
+ history_data = []
174
+ for op, times in st.session_state['performance_metrics'].items():
175
+ if times: # Only show if we have timing data
176
+ avg_time = sum(times) / len(times)
177
+ history_data.append({"Operation": op, "Avg Time (s)": avg_time})
178
+
179
+ if history_data: # Create chart if we have data
180
+ st.sidebar.markdown("### ๐Ÿ“ˆ Timing History")
181
+ chart_data = pd.DataFrame(history_data)
182
+ st.sidebar.bar_chart(chart_data.set_index("Operation"))
183
+
184
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
185
+ # 3. OPTIMIZED AUDIO GENERATION
186
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
187
+
188
+ def clean_for_speech(text: str) -> str:
189
+ """Clean up text for TTS output with enhanced cleaning."""
190
+ with PerformanceTimer("text_cleaning"):
191
+ # Remove markdown formatting
192
+ text = re.sub(r'#+ ', '', text) # Remove headers
193
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Clean links
194
+ text = re.sub(r'[*_~`]', '', text) # Remove emphasis markers
195
+
196
+ # Remove code blocks
197
+ text = re.sub(r'```[\s\S]*?```', '', text)
198
+ text = re.sub(r'`[^`]*`', '', text)
199
+
200
+ # Clean up whitespace
201
+ text = re.sub(r'\s+', ' ', text)
202
+ text = text.replace("\n", " ")
203
+ text = text.replace("</s>", " ")
204
+
205
+ # Remove URLs
206
+ text = re.sub(r'https?://\S+', '', text)
207
+ text = re.sub(r'\(https?://[^\)]+\)', '', text)
208
+
209
+ # Final cleanup
210
+ text = text.strip()
211
+ return text
212
+
213
+ async def async_edge_tts_generate(
214
+ text: str,
215
+ voice: str,
216
+ rate: int = 0,
217
+ pitch: int = 0,
218
+ file_format: str = "mp3"
219
+ ) -> Tuple[Optional[str], float]:
220
+ """Asynchronous TTS generation with performance tracking and caching."""
221
+ with PerformanceTimer("tts_generation") as timer:
222
+ # Clean and validate text
223
+ text = clean_for_speech(text)
224
+ if not text.strip():
225
+ return None, 0
226
+
227
+ # Check cache
228
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
229
+ if cache_key in st.session_state['audio_cache']:
230
+ return st.session_state['audio_cache'][cache_key], 0
231
+
232
+ try:
233
+ # Generate audio
234
+ rate_str = f"{rate:+d}%"
235
+ pitch_str = f"{pitch:+d}Hz"
236
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
237
+
238
+ # Generate unique filename
239
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
240
+ filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
241
+
242
+ # Save audio file
243
+ await communicate.save(filename)
244
+
245
+ # Cache result
246
+ st.session_state['audio_cache'][cache_key] = filename
247
+ return filename, time.time() - timer.start_time
248
+
249
+ except Exception as e:
250
+ st.error(f"Error generating audio: {str(e)}")
251
+ return None, 0
252
+
253
+ async def async_save_qa_with_audio(
254
+ question: str,
255
+ answer: str,
256
+ voice: Optional[str] = None
257
+ ) -> Tuple[str, Optional[str], float, float]:
258
+ """Asynchronously save Q&A to markdown and generate audio with timing."""
259
+ voice = voice or st.session_state['tts_voice']
260
+
261
+ with PerformanceTimer("qa_save") as timer:
262
+ # Save markdown
263
+ md_start = time.time()
264
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
265
+ md_file = create_file(question, answer, "md")
266
+ md_time = time.time() - md_start
267
+
268
+ # Generate audio if enabled
269
+ audio_file = None
270
+ audio_time = 0
271
+ if st.session_state['enable_audio']:
272
+ audio_text = f"{question}\n\nAnswer: {answer}"
273
+ audio_file, audio_time = await async_edge_tts_generate(
274
+ audio_text,
275
+ voice=voice,
276
+ file_format=st.session_state['audio_format']
277
+ )
278
+
279
+ return md_file, audio_file, md_time, audio_time
280
+
281
+ def create_download_link_with_cache(
282
+ file_path: str,
283
+ file_type: str = "mp3"
284
+ ) -> str:
285
+ """Create download link with caching and error handling."""
286
+ with PerformanceTimer("download_link_generation"):
287
+ # Check cache first
288
+ cache_key = f"dl_{file_path}"
289
+ if cache_key in st.session_state['download_link_cache']:
290
+ return st.session_state['download_link_cache'][cache_key]
291
+
292
+ try:
293
+ with open(file_path, "rb") as f:
294
+ b64 = base64.b64encode(f.read()).decode()
295
+
296
+ # Generate appropriate link based on file type
297
+ filename = os.path.basename(file_path)
298
+ if file_type == "mp3":
299
+ link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">๐ŸŽต Download {filename}</a>'
300
+ elif file_type == "wav":
301
+ link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">๐Ÿ”Š Download {filename}</a>'
302
+ elif file_type == "md":
303
+ link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">๐Ÿ“ Download {filename}</a>'
304
+ else:
305
+ link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">โฌ‡๏ธ Download {filename}</a>'
306
+
307
+ # Cache and return
308
+ st.session_state['download_link_cache'][cache_key] = link
309
+ return link
310
+
311
+ except Exception as e:
312
+ st.error(f"Error creating download link: {str(e)}")
313
+ return ""
314
+
315
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
316
+ # 4. PAPER PROCESSING & DISPLAY
317
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
318
+
319
+ def parse_arxiv_refs(ref_text: str) -> List[Dict[str, str]]:
320
+ """Parse arxiv references with improved error handling."""
321
+ if not ref_text:
322
+ return []
323
+
324
+ with PerformanceTimer("parse_refs"):
325
+ results = []
326
+ current_paper = {}
327
+ lines = ref_text.split('\n')
328
+
329
+ for i, line in enumerate(lines):
330
+ try:
331
+ if line.count('|') == 2:
332
+ # Found a new paper line
333
+ if current_paper:
334
+ results.append(current_paper)
335
+ if len(results) >= 20: # Limit to 20 papers
336
+ break
337
+
338
+ # Parse header parts
339
+ header_parts = line.strip('* ').split('|')
340
+ date = header_parts[0].strip()
341
+ title = header_parts[1].strip()
342
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
343
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
344
+
345
+ current_paper = {
346
+ 'date': date,
347
+ 'title': title,
348
+ 'url': url,
349
+ 'authors': '',
350
+ 'summary': '',
351
+ 'full_audio': None,
352
+ 'download_base64': '',
353
+ }
354
+
355
+ elif current_paper:
356
+ # Add content to current paper
357
+ line = line.strip('* ')
358
+ if not current_paper['authors']:
359
+ current_paper['authors'] = line
360
+ else:
361
+ if current_paper['summary']:
362
+ current_paper['summary'] += ' ' + line
363
+ else:
364
+ current_paper['summary'] = line
365
+
366
+ except Exception as e:
367
+ st.warning(f"Error parsing line {i}: {str(e)}")
368
+ continue
369
+
370
+ # Add final paper if exists
371
+ if current_paper:
372
+ results.append(current_paper)
373
+
374
+ return results[:20] # Ensure we don't exceed 20 papers
375
+
376
+ async def create_paper_audio_files(papers: List[Dict], input_question: str):
377
+ """Generate audio files for papers asynchronously with progress tracking."""
378
+ with PerformanceTimer("paper_audio_generation"):
379
+ tasks = []
380
+ for paper in papers:
381
+ try:
382
+ # Prepare text for audio generation
383
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
384
+ audio_text = clean_for_speech(audio_text)
385
+
386
+ # Create task for audio generation
387
+ task = async_edge_tts_generate(
388
+ audio_text,
389
+ voice=st.session_state['tts_voice'],
390
+ file_format=st.session_state['audio_format']
391
+ )
392
+ tasks.append((paper, task))
393
+
394
+ except Exception as e:
395
+ st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}")
396
+ continue
397
+
398
+ # Process all audio generation tasks concurrently
399
+ for paper, task in tasks:
400
+ try:
401
+ audio_file, gen_time = await task
402
+ if audio_file:
403
+ paper['full_audio'] = audio_file
404
+ if st.session_state['enable_download']:
405
+ paper['download_base64'] = create_download_link_with_cache(
406
+ audio_file,
407
+ st.session_state['audio_format']
408
+ )
409
+ except Exception as e:
410
+ st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
411
+ paper['full_audio'] = None
412
+ paper['download_base64'] = ''
413
+
414
+ def display_papers(papers: List[Dict], marquee_settings: Dict):
415
+ """Display paper information with enhanced visualization."""
416
+ with PerformanceTimer("paper_display"):
417
+ st.write("## ๐Ÿ“š Research Papers")
418
+
419
+ # Create tabs for different views
420
+ tab1, tab2 = st.tabs(["๐Ÿ“‹ List View", "๐Ÿ“Š Grid View"])
421
+
422
+ with tab1:
423
+ for i, paper in enumerate(papers, start=1):
424
+ # Create marquee for paper title
425
+ marquee_text = f"๐Ÿ“„ {paper['title']} | ๐Ÿ‘ค {paper['authors'][:120]}"
426
+ display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
427
+
428
+ # Paper details expander
429
+ with st.expander(f"{i}. ๐Ÿ“„ {paper['title']}", expanded=True):
430
+ # Create PDF link
431
+ pdf_url = paper['url'].replace('/abs/', '/pdf/')
432
+
433
+ # Display paper information
434
+ st.markdown(f"""
435
+ **Date:** {paper['date']}
436
+ **Title:** {paper['title']}
437
+ **Links:** ๐Ÿ“„ [Abstract]({paper['url']}) | ๐Ÿ“‘ [PDF]({pdf_url})
438
+ """)
439
+ st.markdown(f"**Authors:** {paper['authors']}")
440
+ st.markdown(f"**Summary:** {paper['summary']}")
441
+
442
+ # Audio player and download if available
443
+ if paper.get('full_audio'):
444
+ st.write("๐ŸŽง Paper Audio Summary")
445
+ st.audio(paper['full_audio'])
446
+ if paper['download_base64']:
447
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
448
+
449
+ with tab2:
450
+ # Create a grid layout of papers
451
+ cols = st.columns(3)
452
+ for i, paper in enumerate(papers):
453
+ with cols[i % 3]:
454
+ st.markdown(f"""
455
+ ### ๐Ÿ“„ {paper['title'][:50]}...
456
+ **Date:** {paper['date']}
457
+ [Abstract]({paper['url']}) | [PDF]({paper['url'].replace('/abs/', '/pdf/')})
458
+ """)
459
+ if paper.get('full_audio'):
460
+ st.audio(paper['full_audio'])
461
+
462
+ def display_papers_in_sidebar(papers: List[Dict]):
463
+ """Display paper listing in sidebar with lazy loading."""
464
+ with PerformanceTimer("sidebar_display"):
465
+ st.sidebar.title("๐Ÿ“š Papers Overview")
466
+
467
+ # Add filter options
468
+ filter_date = st.sidebar.date_input("Filter by date:", None)
469
+ search_term = st.sidebar.text_input("Search papers:", "")
470
+
471
+ # Filter papers based on criteria
472
+ filtered_papers = papers
473
+ if filter_date:
474
+ filtered_papers = [p for p in filtered_papers
475
+ if filter_date.strftime("%Y-%m-%d") in p['date']]
476
+ if search_term:
477
+ search_lower = search_term.lower()
478
+ filtered_papers = [p for p in filtered_papers
479
+ if search_lower in p['title'].lower()
480
+ or search_lower in p['authors'].lower()]
481
+
482
+ # Display filtered papers
483
+ for i, paper in enumerate(filtered_papers, start=1):
484
+ paper_key = f"paper_{paper['url']}"
485
+ if paper_key not in st.session_state:
486
+ st.session_state[paper_key] = False
487
+
488
+ with st.sidebar.expander(f"{i}. {paper['title'][:50]}...", expanded=False):
489
+ # Paper metadata
490
+ st.markdown(f"**Date:** {paper['date']}")
491
+
492
+ # Links
493
+ pdf_url = paper['url'].replace('/abs/', '/pdf/')
494
+ st.markdown(f"๐Ÿ“„ [Abstract]({paper['url']}) | ๐Ÿ“‘ [PDF]({pdf_url})")
495
+
496
+ # Preview of authors and summary
497
+ st.markdown(f"**Authors:** {paper['authors'][:100]}...")
498
+ if paper['summary']:
499
+ st.markdown(f"**Summary:** {paper['summary'][:200]}...")
500
+
501
+ # Audio controls
502
+ if paper['full_audio']:
503
+ if st.button("๐ŸŽต Load Audio", key=f"btn_{paper_key}"):
504
+ st.session_state[paper_key] = True
505
+
506
+ if st.session_state[paper_key]:
507
+ st.audio(paper['full_audio'])
508
+ if paper['download_base64']:
509
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
510
+
511
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
512
+ # 5. FILE MANAGEMENT & HISTORY
513
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
514
 
515
+ def create_file(prompt: str, response: str, file_type: str = "md") -> str:
516
+ """Create a file with proper naming and error handling."""
517
+ with PerformanceTimer("file_creation"):
518
+ try:
519
+ # Generate filename
520
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
521
+
522
+ # Ensure directory exists
523
+ os.makedirs("generated_files", exist_ok=True)
524
+ filepath = os.path.join("generated_files", filename)
525
+
526
+ # Write content
527
+ with open(filepath, 'w', encoding='utf-8') as f:
528
+ if file_type == "md":
529
+ f.write(f"# Query\n{prompt}\n\n# Response\n{response}")
530
+ else:
531
+ f.write(f"{prompt}\n\n{response}")
532
+
533
+ return filepath
534
+
535
+ except Exception as e:
536
+ st.error(f"Error creating file: {str(e)}")
537
+ return ""
538
+
539
+ def get_high_info_terms(text: str, top_n: int = 10) -> List[str]:
540
+ """Extract most informative terms from text."""
541
+ # Common English stop words to filter out
542
+ stop_words = set([
543
+ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to',
544
+ 'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'over',
545
+ 'after', 'the', 'this', 'that', 'these', 'those', 'what', 'which'
546
+ ])
547
+
548
+ # Extract words and bi-grams
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
550
  bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
551
+
552
+ # Combine and filter terms
553
  combined = words + bi_grams
554
+ filtered = [term for term in combined
555
+ if term not in stop_words
556
+ and len(term.split()) <= 2
557
+ and len(term) > 3]
558
+
559
+ # Count and return top terms
560
  counter = Counter(filtered)
561
  return [term for term, freq in counter.most_common(top_n)]
562
 
563
  def clean_text_for_filename(text: str) -> str:
564
+ """Clean text for use in filenames."""
565
+ # Remove special characters
566
  text = text.lower()
567
  text = re.sub(r'[^\w\s-]', '', text)
568
+
569
+ # Remove common unhelpful words
570
+ stop_words = set([
571
+ 'the', 'and', 'for', 'with', 'this', 'that', 'what', 'which',
572
+ 'where', 'when', 'why', 'how', 'who', 'whom', 'whose', 'ai',
573
+ 'library', 'function', 'method', 'class', 'object', 'variable'
574
+ ])
575
+
576
  words = text.split()
577
+ filtered = [w for w in words if len(w) > 3 and w not in stop_words]
578
+
 
579
  return '_'.join(filtered)[:200]
580
 
581
+ def generate_filename(prompt: str, response: str, file_type: str = "md",
582
+ max_length: int = 200) -> str:
583
+ """Generate descriptive filename from content."""
584
+ # Get timestamp prefix
 
 
 
 
585
  prefix = format_timestamp_prefix() + "_"
586
+
587
+ # Extract informative terms
588
+ combined_text = (prompt + " " + response)[:500]
589
+ info_terms = get_high_info_terms(combined_text, top_n=5)
590
+
591
+ # Get content snippet
592
  snippet = (prompt[:40] + " " + response[:40]).strip()
593
  snippet_cleaned = clean_text_for_filename(snippet)
594
 
595
+ # Combine and deduplicate parts
596
  name_parts = info_terms + [snippet_cleaned]
597
  seen = set()
598
  unique_parts = []
 
601
  seen.add(part)
602
  unique_parts.append(part)
603
 
604
+ # Create final filename
605
  full_name = '_'.join(unique_parts).strip('_')
606
  leftover_chars = max_length - len(prefix) - len(file_type) - 1
607
  if len(full_name) > leftover_chars:
 
609
 
610
  return f"{prefix}{full_name}.{file_type}"
611
 
612
+ def create_zip_of_files(md_files: List[str], mp3_files: List[str],
613
+ wav_files: List[str], input_question: str) -> Optional[str]:
614
+ """Create zip archive of files with optimization."""
615
+ with PerformanceTimer("zip_creation"):
616
+ # Filter out readme and empty files
617
+ md_files = [f for f in md_files
618
+ if os.path.basename(f).lower() != 'readme.md'
619
+ and os.path.getsize(f) > 0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620
 
621
+ all_files = md_files + mp3_files + wav_files
622
+ if not all_files:
623
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
 
625
+ try:
626
+ # Generate zip name
627
+ all_content = []
 
 
 
 
 
 
 
 
 
 
628
  for f in all_files:
629
+ if f.endswith('.md'):
630
+ with open(f, 'r', encoding='utf-8') as file:
631
+ all_content.append(file.read())
632
+ elif f.endswith(('.mp3', '.wav')):
633
+ basename = os.path.splitext(os.path.basename(f))[0]
634
+ all_content.append(basename.replace('_', ' '))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
635
 
636
+ all_content.append(input_question)
637
+ combined_content = " ".join(all_content)
638
+ info_terms = get_high_info_terms(combined_content, top_n=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
 
640
+ timestamp = format_timestamp_prefix()
641
+ name_text = '-'.join(term for term in info_terms[:5])
642
+ zip_name = f"archive_{timestamp}_{name_text[:50]}.zip"
 
643
 
644
+ # Create zip file
645
+ with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as z:
646
+ for f in all_files:
647
+ z.write(f, os.path.basename(f))
648
 
649
+ return zip_name
650
+
651
+ except Exception as e:
652
+ st.error(f"Error creating zip archive: {str(e)}")
653
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654
 
655
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
656
+ # 6. OPTIMIZED AI LOOKUP & PROCESSING
657
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
658
 
659
+ def perform_ai_lookup(q: str, vocal_summary: bool = True,
660
+ extended_refs: bool = False,
661
+ titles_summary: bool = True,
662
+ full_audio: bool = False) -> Tuple[str, Dict[str, float]]:
663
+ """Main AI lookup routine with performance optimization."""
664
+ with PerformanceTimer("total_lookup") as total_timer:
665
+ timings = {}
666
+
667
+ # Add operation controls if not present
668
+ if 'operation_controls' not in st.session_state:
669
+ st.sidebar.markdown("### ๐Ÿ”ง Operation Controls")
670
+ st.session_state['enable_claude'] = st.sidebar.checkbox(
671
+ "Enable Claude Search",
672
+ value=st.session_state['enable_claude']
673
+ )
674
+ st.session_state['enable_audio'] = st.sidebar.checkbox(
675
+ "Generate Audio",
676
+ value=st.session_state['enable_audio']
677
+ )
678
+ st.session_state['enable_download'] = st.sidebar.checkbox(
679
+ "Create Download Links",
680
+ value=st.session_state['enable_download']
681
+ )
682
+ st.session_state['operation_controls'] = True
683
+
684
+ result = ""
685
+
686
+ # 1. Claude API (if enabled)
687
+ if st.session_state['enable_claude']:
688
+ with PerformanceTimer("claude_api") as claude_timer:
689
+ try:
690
+ client = anthropic.Anthropic(api_key=anthropic_key)
691
+ response = client.messages.create(
692
+ model="claude-3-sonnet-20240229",
693
+ max_tokens=1000,
694
+ messages=[{"role": "user", "content": q}]
695
+ )
696
+ st.write("Claude's reply ๐Ÿง :")
697
+ st.markdown(response.content[0].text)
698
+ result = response.content[0].text
699
+ timings['claude_api'] = time.time() - claude_timer.start_time
700
+ except Exception as e:
701
+ st.error(f"Error with Claude API: {str(e)}")
702
+ result = "Error occurred during Claude API call"
703
+ timings['claude_api'] = 0
704
+
705
+ # 2. Async save and audio generation
706
+ async def process_results():
707
+ with PerformanceTimer("results_processing") as proc_timer:
708
+ md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio(
709
+ q, result
710
+ )
711
+ timings['markdown_save'] = md_time
712
+ timings['audio_generation'] = audio_time
713
+
714
+ if audio_file and st.session_state['enable_audio']:
715
+ st.subheader("๐Ÿ“ Main Response Audio")
716
+ st.audio(audio_file)
717
+
718
+ if st.session_state['enable_download']:
719
+ st.markdown(
720
+ create_download_link_with_cache(
721
+ audio_file,
722
+ st.session_state['audio_format']
723
+ ),
724
+ unsafe_allow_html=True
725
+ )
726
+
727
+ # Run async operations
728
+ asyncio.run(process_results())
729
+
730
+ # 3. Arxiv RAG with performance tracking
731
+ if st.session_state['enable_claude']:
732
+ with PerformanceTimer("arxiv_rag") as rag_timer:
733
+ try:
734
+ st.write('Running Arxiv RAG with Claude inputs.')
735
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
736
+ refs = client.predict(
737
+ q,
738
+ 10,
739
+ "Semantic Search",
740
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
741
+ api_name="/update_with_rag_md"
742
+ )[0]
743
+ timings['arxiv_rag'] = time.time() - rag_timer.start_time
744
+
745
+ # Process papers asynchronously
746
+ papers = parse_arxiv_refs(refs)
747
+ if papers:
748
+ with PerformanceTimer("paper_processing") as paper_timer:
749
+ async def process_papers():
750
+ # Create minimal links page
751
+ paper_links = create_paper_links_md(papers)
752
+ links_file = create_file(q, paper_links, "md")
753
+ st.markdown(paper_links)
754
+
755
+ # Generate audio and display papers
756
+ await create_paper_audio_files(papers, q)
757
+ display_papers(papers, get_marquee_settings())
758
+ display_papers_in_sidebar(papers)
759
+
760
+ asyncio.run(process_papers())
761
+ timings['paper_processing'] = time.time() - paper_timer.start_time
762
+ else:
763
+ st.warning("No papers found in the response.")
764
+ except Exception as e:
765
+ st.error(f"Error during Arxiv RAG: {str(e)}")
766
+ timings['arxiv_rag'] = 0
767
+
768
+ return result, timings
769
+
770
+ def process_voice_input(text: str):
771
+ """Process voice input with enhanced error handling and feedback."""
772
  if not text:
773
+ st.warning("Please provide some input text.")
774
  return
775
+
776
+ with PerformanceTimer("voice_processing"):
777
+ try:
778
+ st.subheader("๐Ÿ” Search Results")
779
+ result, timings = perform_ai_lookup(
780
+ text,
781
+ vocal_summary=True,
782
+ extended_refs=False,
783
+ titles_summary=True,
784
+ full_audio=True
785
+ )
786
+
787
+ # Save results
788
+ md_file, audio_file = save_qa_with_audio(text, result)
789
+
790
+ # Display results
791
+ st.subheader("๐Ÿ“ Generated Files")
792
+ col1, col2 = st.columns(2)
793
+ with col1:
794
+ st.write(f"๐Ÿ“„ Markdown: {os.path.basename(md_file)}")
795
+ st.markdown(get_download_link(md_file, "md"), unsafe_allow_html=True)
796
+
797
+ with col2:
798
+ if audio_file:
799
+ st.write(f"๐ŸŽต Audio: {os.path.basename(audio_file)}")
800
+ play_and_download_audio(
801
+ audio_file,
802
+ st.session_state['audio_format']
803
+ )
804
+
805
+ except Exception as e:
806
+ st.error(f"Error processing voice input: {str(e)}")
807
 
808
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
809
+ # 7. SIDEBAR AND FILE HISTORY
810
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
811
 
812
  def display_file_history_in_sidebar():
813
+ """Display file history with enhanced organization and filtering."""
814
+ with PerformanceTimer("file_history"):
815
+ st.sidebar.markdown("---")
816
+ st.sidebar.markdown("### ๐Ÿ“‚ File History")
817
+
818
+ # Gather all files
819
+ md_files = glob.glob("*.md")
820
+ mp3_files = glob.glob("*.mp3")
821
+ wav_files = glob.glob("*.wav")
822
+ all_files = md_files + mp3_files + wav_files
823
+
824
+ if not all_files:
825
+ st.sidebar.write("No files found.")
826
+ return
827
+
828
+ # Add file management controls
829
+ col1, col2 = st.sidebar.columns(2)
830
+ with col1:
831
+ if st.button("๐Ÿ—‘ Delete All"):
832
+ try:
833
+ for f in all_files:
834
+ os.remove(f)
835
+ st.session_state.should_rerun = True
836
+ st.success("All files deleted successfully.")
837
+ except Exception as e:
838
+ st.error(f"Error deleting files: {str(e)}")
839
 
840
+ with col2:
841
+ if st.button("โฌ‡๏ธ Zip All"):
842
+ zip_name = create_zip_of_files(
843
+ md_files,
844
+ mp3_files,
845
+ wav_files,
846
+ st.session_state.get('last_query', '')
847
+ )
848
+ if zip_name:
849
+ st.sidebar.markdown(
850
+ get_download_link(zip_name, "zip"),
851
+ unsafe_allow_html=True
852
+ )
853
+
854
+ # Add file filtering options
855
+ st.sidebar.markdown("### ๐Ÿ” Filter Files")
856
+ file_search = st.sidebar.text_input("Search files:", "")
857
+ file_type_filter = st.sidebar.multiselect(
858
+ "File types:",
859
+ ["Markdown", "Audio"],
860
+ default=["Markdown", "Audio"]
861
+ )
862
+
863
+ # Sort files by modification time
864
+ all_files.sort(key=os.path.getmtime, reverse=True)
865
+
866
+ # Filter files based on search and type
867
+ filtered_files = []
868
+ for f in all_files:
869
+ if file_search.lower() in f.lower():
870
+ ext = os.path.splitext(f)[1].lower()
871
+ if (("Markdown" in file_type_filter and ext == ".md") or
872
+ ("Audio" in file_type_filter and ext in [".mp3", ".wav"])):
873
+ filtered_files.append(f)
874
+
875
+ # Display filtered files
876
+ for f in filtered_files:
877
+ fname = os.path.basename(f)
878
+ ext = os.path.splitext(fname)[1].lower().strip('.')
879
+ emoji = FILE_EMOJIS.get(ext, '๐Ÿ“ฆ')
880
+
881
+ # Get file metadata
882
+ mod_time = datetime.fromtimestamp(os.path.getmtime(f))
883
+ time_str = mod_time.strftime("%Y-%m-%d %H:%M:%S")
884
+ file_size = os.path.getsize(f) / 1024 # Size in KB
885
+
886
+ with st.sidebar.expander(f"{emoji} {fname}"):
887
+ st.write(f"**Modified:** {time_str}")
888
+ st.write(f"**Size:** {file_size:.1f} KB")
889
+
890
+ if ext == "md":
891
+ try:
892
+ with open(f, "r", encoding="utf-8") as file_in:
893
+ snippet = file_in.read(200).replace("\n", " ")
894
+ if len(snippet) == 200:
895
+ snippet += "..."
896
+ st.write(snippet)
897
+ st.markdown(
898
+ get_download_link(f, file_type="md"),
899
+ unsafe_allow_html=True
900
+ )
901
+ except Exception as e:
902
+ st.error(f"Error reading markdown file: {str(e)}")
903
+
904
+ elif ext in ["mp3", "wav"]:
905
+ st.audio(f)
906
+ st.markdown(
907
+ get_download_link(f, file_type=ext),
908
+ unsafe_allow_html=True
909
+ )
910
+
911
+ else:
912
+ st.markdown(get_download_link(f), unsafe_allow_html=True)
913
 
914
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
915
+ # 8. MAIN APPLICATION
916
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
917
 
918
  def main():
919
+ """Main application entry point with enhanced UI and error handling."""
920
+ try:
921
+ # 1. Setup marquee UI in sidebar
922
+ update_marquee_settings_ui()
923
+ marquee_settings = get_marquee_settings()
924
+
925
+ # 2. Display welcome marquee
926
+ display_marquee(
927
+ st.session_state['marquee_content'],
928
+ {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
929
+ key_suffix="welcome"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
930
  )
931
 
932
+ # 3. Main action tabs
933
+ tab_main = st.radio(
934
+ "Action:",
935
+ ["๐ŸŽค Voice", "๐Ÿ“ธ Media", "๐Ÿ” ArXiv", "๐Ÿ“ Editor"],
936
+ horizontal=True
937
+ )
 
 
 
 
 
938
 
939
+ # Custom component usage
940
+ mycomponent = components.declare_component(
941
+ "mycomponent",
942
+ path="mycomponent"
943
+ )
944
+ val = mycomponent(my_input_value="Hello")
945
+
946
+ if val:
947
+ # Process input value
948
+ val_stripped = val.replace('\\n', ' ')
949
+ edited_input = st.text_area(
950
+ "โœ๏ธ Edit Input:",
951
+ value=val_stripped,
952
+ height=100
953
+ )
954
 
955
+ # Model selection and options
956
+ run_option = st.selectbox("Model:", ["Arxiv"])
957
+ col1, col2 = st.columns(2)
958
+
959
+ with col1:
960
+ autorun = st.checkbox("โš™ AutoRun", value=True)
961
+ with col2:
962
+ full_audio = st.checkbox("๐Ÿ“š FullAudio", value=False)
963
 
964
+ # Check for input changes
965
+ input_changed = (val != st.session_state.old_val)
 
 
 
966
 
967
+ if autorun and input_changed:
968
+ st.session_state.old_val = val
969
+ st.session_state.last_query = edited_input
970
+ result, timings = perform_ai_lookup(
971
+ edited_input,
972
+ vocal_summary=True,
973
+ extended_refs=False,
974
+ titles_summary=True,
975
+ full_audio=full_audio
976
+ )
977
+
978
+ # Display performance metrics
979
+ display_performance_metrics(timings)
980
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
981
  else:
982
+ if st.button("โ–ถ Run"):
983
+ st.session_state.old_val = val
984
+ st.session_state.last_query = edited_input
985
+ result, timings = perform_ai_lookup(
986
+ edited_input,
987
+ vocal_summary=True,
988
+ extended_refs=False,
989
+ titles_summary=True,
990
+ full_audio=full_audio
991
+ )
992
+
993
+ # Display performance metrics
994
+ display_performance_metrics(timings)
995
+
996
+ # Tab-specific content
997
+ if tab_main == "๐Ÿ” ArXiv":
998
+ display_arxiv_tab()
999
+ elif tab_main == "๐ŸŽค Voice":
1000
+ display_voice_tab()
1001
+ elif tab_main == "๐Ÿ“ธ Media":
1002
+ display_media_tab()
1003
+ elif tab_main == "๐Ÿ“ Editor":
1004
+ display_editor_tab()
1005
+
1006
+ # Display file history
1007
+ display_file_history_in_sidebar()
1008
+
1009
+ # Apply styling
1010
+ apply_custom_styling()
1011
+
1012
+ # Check for rerun
1013
+ if st.session_state.should_rerun:
1014
+ st.session_state.should_rerun = False
1015
+ st.rerun()
1016
+
1017
+ except Exception as e:
1018
+ st.error(f"An error occurred in the main application: {str(e)}")
1019
+ st.info("Please try refreshing the page or contact support if the issue persists.")
1020
 
1021
  if __name__ == "__main__":
1022
+ main()