awacke1 commited on
Commit
f47b1b9
Β·
verified Β·
1 Parent(s): dde3eb8

Rename backup16.app.py to backup17.app.py

Browse files
Files changed (1) hide show
  1. backup16.app.py β†’ backup17.app.py +318 -396
backup16.app.py β†’ backup17.app.py RENAMED
@@ -21,7 +21,9 @@ import asyncio
21
  import edge_tts
22
  from streamlit_marquee import streamlit_marquee
23
 
24
- # 🎯 1. Core Configuration & Setup
 
 
25
  st.set_page_config(
26
  page_title="🚲TalkingAIResearcherπŸ†",
27
  page_icon="πŸš²πŸ†",
@@ -35,7 +37,7 @@ st.set_page_config(
35
  )
36
  load_dotenv()
37
 
38
- # Add available English voices for Edge TTS
39
  EDGE_TTS_VOICES = [
40
  "en-US-AriaNeural",
41
  "en-US-GuyNeural",
@@ -48,50 +50,63 @@ EDGE_TTS_VOICES = [
48
  "en-CA-LiamNeural"
49
  ]
50
 
51
- # Initialize session state variables
52
  if 'marquee_settings' not in st.session_state:
53
- # Default to 20s animationDuration instead of 10s:
54
  st.session_state['marquee_settings'] = {
55
  "background": "#1E1E1E",
56
  "color": "#FFFFFF",
57
  "font-size": "14px",
58
- "animationDuration": "20s", # <- changed to 20s
59
  "width": "100%",
60
  "lineHeight": "35px"
61
  }
62
 
63
  if 'tts_voice' not in st.session_state:
64
  st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
 
65
  if 'audio_format' not in st.session_state:
66
  st.session_state['audio_format'] = 'mp3'
 
67
  if 'transcript_history' not in st.session_state:
68
  st.session_state['transcript_history'] = []
 
69
  if 'chat_history' not in st.session_state:
70
  st.session_state['chat_history'] = []
 
71
  if 'openai_model' not in st.session_state:
72
  st.session_state['openai_model'] = "gpt-4o-2024-05-13"
 
73
  if 'messages' not in st.session_state:
74
  st.session_state['messages'] = []
 
75
  if 'last_voice_input' not in st.session_state:
76
  st.session_state['last_voice_input'] = ""
 
77
  if 'editing_file' not in st.session_state:
78
  st.session_state['editing_file'] = None
 
79
  if 'edit_new_name' not in st.session_state:
80
  st.session_state['edit_new_name'] = ""
 
81
  if 'edit_new_content' not in st.session_state:
82
  st.session_state['edit_new_content'] = ""
 
83
  if 'viewing_prefix' not in st.session_state:
84
  st.session_state['viewing_prefix'] = None
 
85
  if 'should_rerun' not in st.session_state:
86
  st.session_state['should_rerun'] = False
 
87
  if 'old_val' not in st.session_state:
88
  st.session_state['old_val'] = None
 
89
  if 'last_query' not in st.session_state:
90
  st.session_state['last_query'] = ""
 
91
  if 'marquee_content' not in st.session_state:
92
  st.session_state['marquee_content'] = "πŸš€ Welcome to TalkingAIResearcher | πŸ€– Your Research Assistant"
93
 
94
- # πŸ”‘ 2. API Setup & Clients
95
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
96
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
97
  xai_key = os.getenv('xai',"")
@@ -105,43 +120,44 @@ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_OR
105
  HF_KEY = os.getenv('HF_KEY')
106
  API_URL = os.getenv('API_URL')
107
 
108
- # Constants
109
  FILE_EMOJIS = {
110
  "md": "πŸ“",
111
  "mp3": "🎡",
112
  "wav": "πŸ”Š"
113
  }
114
 
 
 
 
 
115
  def get_central_time():
116
- """Get current time in US Central timezone"""
117
  central = pytz.timezone('US/Central')
118
  return datetime.now(central)
119
 
120
  def format_timestamp_prefix():
121
- """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM"""
122
  ct = get_central_time()
123
  return ct.strftime("%m_%d_%y_%I_%M_%p")
124
 
125
  def initialize_marquee_settings():
126
- """Initialize marquee settings in session state"""
127
  if 'marquee_settings' not in st.session_state:
128
  st.session_state['marquee_settings'] = {
129
  "background": "#1E1E1E",
130
  "color": "#FFFFFF",
131
  "font-size": "14px",
132
- "animationDuration": "20s", # ensure 20s stays
133
  "width": "100%",
134
  "lineHeight": "35px"
135
  }
136
 
137
  def get_marquee_settings():
138
- """Get or update marquee settings from session state"""
139
  initialize_marquee_settings()
140
  return st.session_state['marquee_settings']
141
 
142
  def update_marquee_settings_ui():
143
- """Update marquee settings via UI controls"""
144
- initialize_marquee_settings()
145
  st.sidebar.markdown("### 🎯 Marquee Settings")
146
  cols = st.sidebar.columns(2)
147
  with cols[0]:
@@ -153,7 +169,6 @@ def update_marquee_settings_ui():
153
  key="text_color_picker")
154
  with cols[1]:
155
  font_size = st.slider("πŸ“ Size", 10, 24, 14, key="font_size_slider")
156
- # The default is now 20, not 10
157
  duration = st.slider("⏱️ Speed", 1, 20, 20, key="duration_slider")
158
 
159
  st.session_state['marquee_settings'].update({
@@ -164,7 +179,7 @@ def update_marquee_settings_ui():
164
  })
165
 
166
  def display_marquee(text, settings, key_suffix=""):
167
- """Display marquee with given text and settings"""
168
  truncated_text = text[:280] + "..." if len(text) > 280 else text
169
  streamlit_marquee(
170
  content=truncated_text,
@@ -174,6 +189,7 @@ def display_marquee(text, settings, key_suffix=""):
174
  st.write("")
175
 
176
  def get_high_info_terms(text: str, top_n=10) -> list:
 
177
  stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
178
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
179
  bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
@@ -183,71 +199,56 @@ def get_high_info_terms(text: str, top_n=10) -> list:
183
  return [term for term, freq in counter.most_common(top_n)]
184
 
185
  def clean_text_for_filename(text: str) -> str:
 
186
  text = text.lower()
187
  text = re.sub(r'[^\w\s-]', '', text)
188
  words = text.split()
189
- stop_short = set(['the', 'and', 'for', 'with', 'this', 'that'])
 
190
  filtered = [w for w in words if len(w) > 3 and w not in stop_short]
191
  return '_'.join(filtered)[:200]
192
 
193
-
194
-
195
  def generate_filename(prompt, response, file_type="md", max_length=200):
196
  """
197
  Generate a shortened filename by:
198
- 1. Extracting high-info terms
199
- 2. Creating a smaller snippet
200
- 3. Cleaning & joining them
201
- 4. Truncating if needed
202
  """
203
  prefix = format_timestamp_prefix() + "_"
204
- combined_text = (prompt + " " + response)[:200] # limit huge text input
205
  info_terms = get_high_info_terms(combined_text, top_n=5)
206
  snippet = (prompt[:40] + " " + response[:40]).strip()
207
  snippet_cleaned = clean_text_for_filename(snippet)
 
 
208
  name_parts = info_terms + [snippet_cleaned]
209
- full_name = '_'.join(name_parts).strip('_')
 
 
 
 
 
 
 
210
  leftover_chars = max_length - len(prefix) - len(file_type) - 1
211
  if len(full_name) > leftover_chars:
212
  full_name = full_name[:leftover_chars]
213
 
214
  return f"{prefix}{full_name}.{file_type}"
215
 
216
-
217
  def create_file(prompt, response, file_type="md"):
218
- """
219
- Create a file using the shortened filename from generate_filename().
220
- """
221
- filename = generate_filename(prompt.strip(), response.strip(), file_type)
222
- with open(filename, 'w', encoding='utf-8') as f:
223
- f.write(prompt + "\n\n" + response)
224
- return filename
225
-
226
-
227
-
228
-
229
-
230
-
231
-
232
- def generate_filename_old(prompt, response, file_type="md"):
233
- prefix = format_timestamp_prefix() + "_"
234
- combined = (prompt + " " + response).strip()
235
- info_terms = get_high_info_terms(combined, top_n=10)
236
- snippet = (prompt[:100] + " " + response[:100]).strip()
237
- snippet_cleaned = clean_text_for_filename(snippet)
238
- name_parts = info_terms + [snippet_cleaned]
239
- full_name = '_'.join(name_parts)
240
- if len(full_name) > 150:
241
- full_name = full_name[:150]
242
- return f"{prefix}{full_name}.{file_type}"
243
-
244
- def create_file_old(prompt, response, file_type="md"):
245
  filename = generate_filename(prompt.strip(), response.strip(), file_type)
246
  with open(filename, 'w', encoding='utf-8') as f:
247
  f.write(prompt + "\n\n" + response)
248
  return filename
249
 
250
  def get_download_link(file, file_type="zip"):
 
 
 
251
  with open(file, "rb") as f:
252
  b64 = base64.b64encode(f.read()).decode()
253
  if file_type == "zip":
@@ -262,6 +263,7 @@ def get_download_link(file, file_type="zip"):
262
  return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
263
 
264
  def clean_for_speech(text: str) -> str:
 
265
  text = text.replace("\n", " ")
266
  text = text.replace("</s>", " ")
267
  text = text.replace("#", "")
@@ -270,6 +272,7 @@ def clean_for_speech(text: str) -> str:
270
  return text
271
 
272
  async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
 
273
  text = clean_for_speech(text)
274
  if not text.strip():
275
  return None
@@ -281,83 +284,40 @@ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=
281
  return out_fn
282
 
283
  def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
 
284
  return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
285
 
286
  def play_and_download_audio(file_path, file_type="mp3"):
 
287
  if file_path and os.path.exists(file_path):
288
  st.audio(file_path)
289
  dl_link = get_download_link(file_path, file_type=file_type)
290
  st.markdown(dl_link, unsafe_allow_html=True)
291
 
292
  def save_qa_with_audio(question, answer, voice=None):
293
- """Save Q&A to markdown and generate audio"""
294
  if not voice:
295
  voice = st.session_state['tts_voice']
296
 
297
- # Create markdown file
298
  combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
299
  md_file = create_file(question, answer, "md")
300
-
301
- # Generate audio file
302
  audio_text = f"{question}\n\nAnswer: {answer}"
303
  audio_file = speak_with_edge_tts(
304
  audio_text,
305
  voice=voice,
306
  file_format=st.session_state['audio_format']
307
  )
308
-
309
  return md_file, audio_file
310
 
311
- def process_paper_content(paper):
312
- marquee_text = f"πŸ“„ {paper['title']} | πŸ‘€ {paper['authors'][:100]} | πŸ“ {paper['summary'][:500]}"
313
- audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
314
- return marquee_text, audio_text
315
-
316
- def create_paper_audio_files(papers, input_question):
317
- for paper in papers:
318
- try:
319
- marquee_text, audio_text = process_paper_content(paper)
320
-
321
- audio_text = clean_for_speech(audio_text)
322
- file_format = st.session_state['audio_format']
323
- audio_file = speak_with_edge_tts(audio_text,
324
- voice=st.session_state['tts_voice'],
325
- file_format=file_format)
326
- paper['full_audio'] = audio_file
327
-
328
- st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(audio_file)}")
329
- play_and_download_audio(audio_file, file_type=file_format)
330
- paper['marquee_text'] = marquee_text
331
-
332
- except Exception as e:
333
- st.warning(f"Error processing paper {paper['title']}: {str(e)}")
334
- paper['full_audio'] = None
335
- paper['marquee_text'] = None
336
-
337
- def display_papers(papers, marquee_settings):
338
- st.write("## Research Papers")
339
-
340
- papercount = 0
341
- for paper in papers:
342
- papercount += 1
343
- if papercount <= 20:
344
- if paper.get('marquee_text'):
345
- display_marquee(paper['marquee_text'],
346
- marquee_settings,
347
- key_suffix=f"paper_{papercount}")
348
-
349
- with st.expander(f"{papercount}. πŸ“„ {paper['title']}", expanded=True):
350
- st.markdown(f"**{paper['date']} | {paper['title']} | ⬇️**")
351
- st.markdown(f"*{paper['authors']}*")
352
- st.markdown(paper['summary'])
353
-
354
- if paper.get('full_audio'):
355
- st.write("πŸ“š Paper Audio")
356
- file_ext = os.path.splitext(paper['full_audio'])[1].lower().strip('.')
357
- if file_ext in ['mp3', 'wav']:
358
- st.audio(paper['full_audio'])
359
 
360
  def parse_arxiv_refs(ref_text: str):
 
 
 
 
361
  if not ref_text:
362
  return []
363
 
@@ -367,11 +327,11 @@ def parse_arxiv_refs(ref_text: str):
367
 
368
  for i, line in enumerate(lines):
369
  if line.count('|') == 2:
 
370
  if current_paper:
371
  results.append(current_paper)
372
  if len(results) >= 20:
373
  break
374
-
375
  try:
376
  header_parts = line.strip('* ').split('|')
377
  date = header_parts[0].strip()
@@ -385,7 +345,8 @@ def parse_arxiv_refs(ref_text: str):
385
  'url': url,
386
  'authors': '',
387
  'summary': '',
388
- 'content_start': i + 1
 
389
  }
390
  except Exception as e:
391
  st.warning(f"Error parsing paper header: {str(e)}")
@@ -393,6 +354,7 @@ def parse_arxiv_refs(ref_text: str):
393
  continue
394
 
395
  elif current_paper:
 
396
  if not current_paper['authors']:
397
  current_paper['authors'] = line.strip('* ')
398
  else:
@@ -406,128 +368,190 @@ def parse_arxiv_refs(ref_text: str):
406
 
407
  return results[:20]
408
 
 
 
 
 
 
 
409
 
410
- # ---------------------------- Edit 1/11/2025 - add a constitution to my arxiv system templating to build configurable character and personality of IO.
411
-
412
- def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
413
- titles_summary=True, full_audio=False):
414
- start = time.time()
415
-
416
- ai_constitution = """
417
- You are a talented AI coder and songwriter with a unique ability to explain scientific concepts through music with code easter eggs.. Your task is to create a song that not only entertains but also educates listeners about a specific science problem and its potential solutions.
418
-
419
- 1. First, carefully read and analyze the problem provided:
420
- <science_problem>
421
- {{q}}
422
- </science_problem>
423
-
424
- 2. Next, consider the style requested:
425
- <song_style>
426
- {{SONG_STYLE}}
427
- </song_style>
428
-
429
- 3. Follow these steps to create your output:
430
-
431
- 1. Analyze the problem:
432
- - Identify the key issues and challenges
433
- - Note any potential solutions or technologies mentioned, especially in AI
434
- - Consider how these concepts can be simplified for a general audience
435
-
436
- 2. Plan your structure. Document and enumerate in markdown outlines with emojis.:
437
- - Decide on a format that fits the style
438
- - Plan to introduce the problem
439
- - Highlight key points or solutions
440
 
441
- 3. Write.:
442
- - Begin with an attention-grabbing opening line
443
- - Use metaphors and analogies to explain complex concepts
444
- - Ensure the flow naturally fits the rhythm of the chosen style
445
- - Include scientific terminology, but explain it in simple terms within
 
 
 
 
446
 
447
- 4. Incorporate scientific explanations.:
448
- - Weave factual information throughout the verses
449
- - Use the chorus to reinforce main ideas or solutions
450
- - Ensure that the scientific content is accurate and up-to-date
451
 
452
- 5. Match the requested style.:
453
- - Adapt your word choice and phrasing to fit the genre
454
- - Consider the typical rhythm and structure of this style
455
- - If applicable, include style-specific elements
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
 
457
- 6. Review and refine, add useful paper titles, keywords, descriptions of topics and concepts.:
458
- - Check that effectively communicates the problem and solutions
459
- - Ensure catchy and memorable
460
- - Verify maintains the requested style throughout
461
- """
 
 
 
 
462
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
 
 
 
 
 
464
 
 
 
 
465
 
466
-
467
- # Claude then Arxiv..
 
 
 
 
 
468
 
469
- # Claude:
470
  client = anthropic.Anthropic(api_key=anthropic_key)
471
  user_input = q
472
-
473
  response = client.messages.create(
474
  model="claude-3-sonnet-20240229",
475
  max_tokens=1000,
476
  messages=[
477
  {"role": "user", "content": user_input}
478
  ])
479
-
480
  st.write("Claude's reply 🧠:")
481
  st.markdown(response.content[0].text)
482
 
483
- # Render audio track for Claude Response
484
- #filename = generate_filename(q, response.content[0].text)
485
  result = response.content[0].text
486
- create_file(q, result)
487
- # Save and produce audio for Claude response
488
  md_file, audio_file = save_qa_with_audio(q, result)
489
  st.subheader("πŸ“ Main Response Audio")
490
  play_and_download_audio(audio_file, st.session_state['audio_format'])
491
 
492
-
493
-
494
-
495
-
496
- # Arxiv:
497
- st.write("Arxiv's AI this Evening is Mixtral 8x7B MoE Instruct with 9 English Voices 🧠:")
498
-
499
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
500
- refs = client.predict(q, 20, "Semantic Search",
501
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
502
- api_name="/update_with_rag_md")[0]
503
-
504
- r2 = client.predict(q, "mistralai/Mixtral-8x7B-Instruct-v0.1",
505
- True, api_name="/ask_llm")
 
 
 
 
 
 
 
 
506
 
507
  result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
508
-
509
- # Save and produce audio
510
  md_file, audio_file = save_qa_with_audio(q, result)
511
-
512
  st.subheader("πŸ“ Main Response Audio")
513
  play_and_download_audio(audio_file, st.session_state['audio_format'])
514
 
 
515
  papers = parse_arxiv_refs(refs)
516
  if papers:
 
 
 
 
 
 
517
  create_paper_audio_files(papers, input_question=q)
518
  display_papers(papers, get_marquee_settings())
 
519
  else:
520
  st.warning("No papers found in the response.")
521
 
522
- elapsed = time.time()-start
523
  st.write(f"**Total Elapsed:** {elapsed:.2f} s")
524
-
525
  return result
526
 
527
  def process_voice_input(text):
 
528
  if not text:
529
  return
530
-
531
  st.subheader("πŸ” Search Results")
532
  result = perform_ai_lookup(
533
  text,
@@ -536,165 +560,83 @@ def process_voice_input(text):
536
  titles_summary=True,
537
  full_audio=True
538
  )
539
-
540
  md_file, audio_file = save_qa_with_audio(text, result)
541
-
542
  st.subheader("πŸ“ Generated Files")
543
  st.write(f"Markdown: {md_file}")
544
  st.write(f"Audio: {audio_file}")
545
  play_and_download_audio(audio_file, st.session_state['audio_format'])
546
 
547
- def load_files_for_sidebar():
 
 
 
 
 
 
 
 
 
 
 
 
548
  md_files = glob.glob("*.md")
549
  mp3_files = glob.glob("*.mp3")
550
  wav_files = glob.glob("*.wav")
551
-
552
- md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
553
  all_files = md_files + mp3_files + wav_files
554
 
555
- groups = defaultdict(list)
556
- prefix_length = len("MM_dd_yy_hh_mm_AP")
557
-
558
- for f in all_files:
559
- basename = os.path.basename(f)
560
- if len(basename) >= prefix_length and '_' in basename:
561
- group_name = basename[:prefix_length]
562
- groups[group_name].append(f)
563
- else:
564
- groups['Other'].append(f)
565
-
566
- sorted_groups = sorted(groups.items(),
567
- key=lambda x: x[0] if x[0] != 'Other' else '',
568
- reverse=True)
569
- return sorted_groups
570
-
571
- def display_file_manager_sidebar(groups_sorted):
572
- st.sidebar.title("🎡 Audio & Docs Manager")
573
-
574
- all_md = []
575
- all_mp3 = []
576
- all_wav = []
577
- for _, files in groups_sorted:
578
- for f in files:
579
- if f.endswith(".md"):
580
- all_md.append(f)
581
- elif f.endswith(".mp3"):
582
- all_mp3.append(f)
583
- elif f.endswith(".wav"):
584
- all_wav.append(f)
585
-
586
- col1, col4 = st.sidebar.columns(2)
587
- with col1:
588
- if st.button("πŸ—‘ Delete All"):
589
- for f in all_md:
590
- os.remove(f)
591
- for f in all_mp3:
592
- os.remove(f)
593
- for f in all_wav:
594
- os.remove(f)
595
- st.session_state.should_rerun = True
596
- with col4:
597
- if st.button("⬇️ Zip All"):
598
- zip_name = create_zip_of_files(all_md, all_mp3, all_wav, st.session_state.get('last_query', ''))
599
- if zip_name:
600
- st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
601
-
602
- for group_name, files in groups_sorted:
603
- if group_name == 'Other':
604
- group_label = 'Other Files'
605
- else:
606
- try:
607
- timestamp_dt = datetime.strptime(group_name, "%m_%d_%y_%I_%M_%p")
608
- group_label = timestamp_dt.strftime("%b %d, %Y %I:%M %p")
609
- except ValueError:
610
- group_label = group_name
611
-
612
- with st.sidebar.expander(f"πŸ“ {group_label} ({len(files)})", expanded=True):
613
- c1, c2 = st.columns(2)
614
- with c1:
615
- if st.button("πŸ‘€ View", key=f"view_group_{group_name}"):
616
- st.session_state.viewing_prefix = group_name
617
- with c2:
618
- if st.button("πŸ—‘ Del", key=f"del_group_{group_name}"):
619
- for f in files:
620
- os.remove(f)
621
- st.success(f"Deleted group {group_label}!")
622
- st.session_state.should_rerun = True
623
-
624
- for f in files:
625
- fname = os.path.basename(f)
626
- ext = os.path.splitext(fname)[1].lower()
627
- emoji = FILE_EMOJIS.get(ext.strip('.'), '')
628
- mtime = os.path.getmtime(f)
629
- ctime = datetime.fromtimestamp(mtime).strftime("%I:%M:%S %p")
630
- st.write(f"{emoji} **{fname}** - {ctime}")
631
-
632
- def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
633
- md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
634
- all_files = md_files + mp3_files + wav_files
635
  if not all_files:
636
- return None
 
 
 
 
637
 
638
- all_content = []
639
  for f in all_files:
640
- if f.endswith('.md'):
641
- with open(f, 'r', encoding='utf-8') as file:
642
- all_content.append(file.read())
643
- elif f.endswith('.mp3') or f.endswith('.wav'):
644
- basename = os.path.splitext(os.path.basename(f))[0]
645
- words = basename.replace('_', ' ')
646
- all_content.append(words)
647
-
648
- all_content.append(input_question)
649
- combined_content = " ".join(all_content)
650
- info_terms = get_high_info_terms(combined_content, top_n=10)
651
-
652
- timestamp = format_timestamp_prefix()
653
- name_text = '_'.join(term.replace(' ', '-') for term in info_terms[:10])
654
- zip_name = f"{timestamp}_{name_text}.zip"
655
-
656
- with zipfile.ZipFile(zip_name, 'w') as z:
657
- for f in all_files:
658
- z.write(f)
659
-
660
- return zip_name
 
 
661
 
662
  def main():
663
- # Update marquee settings UI first
664
  update_marquee_settings_ui()
665
  marquee_settings = get_marquee_settings()
666
-
667
- # Initial welcome marquee
668
  display_marquee(st.session_state['marquee_content'],
669
- {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
670
- key_suffix="welcome")
671
 
672
- # Load files for sidebar
673
- groups_sorted = load_files_for_sidebar()
674
-
675
- # Update marquee content when viewing files
676
- if st.session_state.viewing_prefix:
677
- for group_name, files in groups_sorted:
678
- if group_name == st.session_state.viewing_prefix:
679
- for f in files:
680
- if f.endswith('.md'):
681
- with open(f, 'r', encoding='utf-8') as file:
682
- st.session_state['marquee_content'] = file.read()[:280]
683
-
684
- # Instead of putting voice settings in the sidebar,
685
- # we will handle them in the "🎀 Voice" tab below.
686
-
687
- # Main Interface
688
  tab_main = st.radio("Action:", ["🎀 Voice", "πŸ“Έ Media", "πŸ” ArXiv", "πŸ“ Editor"],
689
- horizontal=True)
690
 
 
691
  mycomponent = components.declare_component("mycomponent", path="mycomponent")
692
  val = mycomponent(my_input_value="Hello")
693
 
694
  if val:
695
  val_stripped = val.replace('\\n', ' ')
696
  edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
697
-
698
  run_option = st.selectbox("Model:", ["Arxiv"])
699
  col1, col2 = st.columns(2)
700
  with col1:
@@ -707,20 +649,28 @@ def main():
707
  if autorun and input_changed:
708
  st.session_state.old_val = val
709
  st.session_state.last_query = edited_input
710
- result = perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
711
- titles_summary=True, full_audio=full_audio)
 
 
 
712
  else:
713
  if st.button("β–Ά Run"):
714
  st.session_state.old_val = val
715
  st.session_state.last_query = edited_input
716
- result = perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
717
- titles_summary=True, full_audio=full_audio)
 
 
 
718
 
719
- # --- Tab: ArXiv
 
 
720
  if tab_main == "πŸ” ArXiv":
721
  st.subheader("πŸ” Query ArXiv")
722
  q = st.text_input("πŸ” Query:", key="arxiv_query")
723
-
724
  st.markdown("### πŸŽ› Options")
725
  vocal_summary = st.checkbox("πŸŽ™ShortAudio", value=True, key="option_vocal_summary")
726
  extended_refs = st.checkbox("πŸ“œLongRefs", value=False, key="option_extended_refs")
@@ -735,11 +685,12 @@ def main():
735
  if full_transcript:
736
  create_file(q, result, "md")
737
 
738
- # --- Tab: Voice
 
 
739
  elif tab_main == "🎀 Voice":
740
  st.subheader("🎀 Voice Input")
741
 
742
- # Move voice selection here:
743
  st.markdown("### 🎀 Voice Settings")
744
  selected_voice = st.selectbox(
745
  "Select TTS Voice:",
@@ -747,7 +698,6 @@ def main():
747
  index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
748
  )
749
 
750
- # Audio Format Settings below the voice selection
751
  st.markdown("### πŸ”Š Audio Format")
752
  selected_format = st.radio(
753
  "Choose Audio Format:",
@@ -755,6 +705,7 @@ def main():
755
  index=0
756
  )
757
 
 
758
  if selected_voice != st.session_state['tts_voice']:
759
  st.session_state['tts_voice'] = selected_voice
760
  st.rerun()
@@ -762,7 +713,7 @@ def main():
762
  st.session_state['audio_format'] = selected_format.lower()
763
  st.rerun()
764
 
765
- # Now the text area to enter your message
766
  user_text = st.text_area("πŸ’¬ Message:", height=100)
767
  user_text = user_text.strip().replace('\n', ' ')
768
 
@@ -774,95 +725,65 @@ def main():
774
  st.write("**You:**", c["user"])
775
  st.write("**Response:**", c["claude"])
776
 
777
- # --- Tab: Media
 
 
778
  elif tab_main == "πŸ“Έ Media":
779
- st.header("πŸ“Έ Images & πŸŽ₯ Videos")
780
- tabs = st.tabs(["πŸ–Ό Images", "πŸŽ₯ Video"])
 
 
 
 
781
  with tabs[0]:
782
- imgs = glob.glob("*.png") + glob.glob("*.jpg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
783
  if imgs:
784
- c = st.slider("Cols", 1, 5, 3)
785
  cols = st.columns(c)
786
  for i, f in enumerate(imgs):
787
  with cols[i % c]:
788
  st.image(Image.open(f), use_container_width=True)
789
- if st.button(f"πŸ‘€ Analyze {os.path.basename(f)}", key=f"analyze_{f}"):
790
- response = openai_client.chat.completions.create(
791
- model=st.session_state["openai_model"],
792
- messages=[
793
- {"role": "system", "content": "Analyze the image content."},
794
- {"role": "user", "content": [
795
- {"type": "image_url",
796
- "image_url": {"url": f"data:image/jpeg;base64,{base64.b64encode(open(f, 'rb').read()).decode()}"}}
797
- ]}
798
- ]
799
- )
800
- st.markdown(response.choices[0].message.content)
801
  else:
802
  st.write("No images found.")
803
 
804
- with tabs[1]:
805
- vids = glob.glob("*.mp4")
 
 
806
  if vids:
807
  for v in vids:
808
- with st.expander(f"πŸŽ₯ {os.path.basename(v)}"):
809
  st.video(v)
810
- if st.button(f"Analyze {os.path.basename(v)}", key=f"analyze_{v}"):
811
- frames = process_video(v)
812
- response = openai_client.chat.completions.create(
813
- model=st.session_state["openai_model"],
814
- messages=[
815
- {"role": "system", "content": "Analyze video frames."},
816
- {"role": "user", "content": [
817
- {"type": "image_url",
818
- "image_url": {"url": f"data:image/jpeg;base64,{frame}"}}
819
- for frame in frames
820
- ]}
821
- ]
822
- )
823
- st.markdown(response.choices[0].message.content)
824
  else:
825
  st.write("No videos found.")
826
 
827
- # --- Tab: Editor
 
 
828
  elif tab_main == "πŸ“ Editor":
829
- if st.session_state.editing_file:
830
- st.subheader(f"Editing: {st.session_state.editing_file}")
831
- new_text = st.text_area("✏️ Content:", st.session_state.edit_new_content, height=300)
832
- if st.button("πŸ’Ύ Save"):
833
- with open(st.session_state.editing_file, 'w', encoding='utf-8') as f:
834
- f.write(new_text)
835
- st.success("File updated successfully!")
836
- st.session_state.should_rerun = True
837
- st.session_state.editing_file = None
838
- else:
839
- st.write("Select a file from the sidebar to edit.")
840
-
841
- # Display file manager in sidebar
842
- display_file_manager_sidebar(groups_sorted)
843
-
844
- # Display viewed group content
845
- if st.session_state.viewing_prefix and any(st.session_state.viewing_prefix == group for group, _ in groups_sorted):
846
- st.write("---")
847
- st.write(f"**Viewing Group:** {st.session_state.viewing_prefix}")
848
- for group_name, files in groups_sorted:
849
- if group_name == st.session_state.viewing_prefix:
850
- for f in files:
851
- fname = os.path.basename(f)
852
- ext = os.path.splitext(fname)[1].lower().strip('.')
853
- st.write(f"### {fname}")
854
- if ext == "md":
855
- content = open(f, 'r', encoding='utf-8').read()
856
- st.markdown(content)
857
- elif ext in ["mp3", "wav"]:
858
- st.audio(f)
859
- else:
860
- st.markdown(get_download_link(f), unsafe_allow_html=True)
861
- break
862
- if st.button("❌ Close"):
863
- st.session_state.viewing_prefix = None
864
- st.session_state['marquee_content'] = "πŸš€ Welcome to Deep Research Evaluator | πŸ€– Your Talking Research Assistant"
865
 
 
866
  st.markdown("""
867
  <style>
868
  .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
@@ -871,6 +792,7 @@ def main():
871
  </style>
872
  """, unsafe_allow_html=True)
873
 
 
874
  if st.session_state.should_rerun:
875
  st.session_state.should_rerun = False
876
  st.rerun()
 
21
  import edge_tts
22
  from streamlit_marquee import streamlit_marquee
23
 
24
+ # ─────────────────────────────────────────────────────────
25
+ # 1. CORE CONFIGURATION & SETUP
26
+ # ─────────────────────────────────────────────────────────
27
  st.set_page_config(
28
  page_title="🚲TalkingAIResearcherπŸ†",
29
  page_icon="πŸš²πŸ†",
 
37
  )
38
  load_dotenv()
39
 
40
+ # Available English voices for Edge TTS
41
  EDGE_TTS_VOICES = [
42
  "en-US-AriaNeural",
43
  "en-US-GuyNeural",
 
50
  "en-CA-LiamNeural"
51
  ]
52
 
53
+ # Session state variables
54
  if 'marquee_settings' not in st.session_state:
 
55
  st.session_state['marquee_settings'] = {
56
  "background": "#1E1E1E",
57
  "color": "#FFFFFF",
58
  "font-size": "14px",
59
+ "animationDuration": "20s",
60
  "width": "100%",
61
  "lineHeight": "35px"
62
  }
63
 
64
  if 'tts_voice' not in st.session_state:
65
  st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
66
+
67
  if 'audio_format' not in st.session_state:
68
  st.session_state['audio_format'] = 'mp3'
69
+
70
  if 'transcript_history' not in st.session_state:
71
  st.session_state['transcript_history'] = []
72
+
73
  if 'chat_history' not in st.session_state:
74
  st.session_state['chat_history'] = []
75
+
76
  if 'openai_model' not in st.session_state:
77
  st.session_state['openai_model'] = "gpt-4o-2024-05-13"
78
+
79
  if 'messages' not in st.session_state:
80
  st.session_state['messages'] = []
81
+
82
  if 'last_voice_input' not in st.session_state:
83
  st.session_state['last_voice_input'] = ""
84
+
85
  if 'editing_file' not in st.session_state:
86
  st.session_state['editing_file'] = None
87
+
88
  if 'edit_new_name' not in st.session_state:
89
  st.session_state['edit_new_name'] = ""
90
+
91
  if 'edit_new_content' not in st.session_state:
92
  st.session_state['edit_new_content'] = ""
93
+
94
  if 'viewing_prefix' not in st.session_state:
95
  st.session_state['viewing_prefix'] = None
96
+
97
  if 'should_rerun' not in st.session_state:
98
  st.session_state['should_rerun'] = False
99
+
100
  if 'old_val' not in st.session_state:
101
  st.session_state['old_val'] = None
102
+
103
  if 'last_query' not in st.session_state:
104
  st.session_state['last_query'] = ""
105
+
106
  if 'marquee_content' not in st.session_state:
107
  st.session_state['marquee_content'] = "πŸš€ Welcome to TalkingAIResearcher | πŸ€– Your Research Assistant"
108
 
109
+ # API Keys
110
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
111
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
112
  xai_key = os.getenv('xai',"")
 
120
  HF_KEY = os.getenv('HF_KEY')
121
  API_URL = os.getenv('API_URL')
122
 
123
+ # Helper constants
124
  FILE_EMOJIS = {
125
  "md": "πŸ“",
126
  "mp3": "🎡",
127
  "wav": "πŸ”Š"
128
  }
129
 
130
+ # ─────────────────────────────────────────────────────────
131
+ # 2. HELPER FUNCTIONS
132
+ # ─────────────────────────────────────────────────────────
133
+
134
  def get_central_time():
135
+ """Get current time in US Central timezone."""
136
  central = pytz.timezone('US/Central')
137
  return datetime.now(central)
138
 
139
  def format_timestamp_prefix():
140
+ """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM."""
141
  ct = get_central_time()
142
  return ct.strftime("%m_%d_%y_%I_%M_%p")
143
 
144
  def initialize_marquee_settings():
 
145
  if 'marquee_settings' not in st.session_state:
146
  st.session_state['marquee_settings'] = {
147
  "background": "#1E1E1E",
148
  "color": "#FFFFFF",
149
  "font-size": "14px",
150
+ "animationDuration": "20s",
151
  "width": "100%",
152
  "lineHeight": "35px"
153
  }
154
 
155
  def get_marquee_settings():
 
156
  initialize_marquee_settings()
157
  return st.session_state['marquee_settings']
158
 
159
  def update_marquee_settings_ui():
160
+ """Add color pickers & sliders for marquee config in sidebar."""
 
161
  st.sidebar.markdown("### 🎯 Marquee Settings")
162
  cols = st.sidebar.columns(2)
163
  with cols[0]:
 
169
  key="text_color_picker")
170
  with cols[1]:
171
  font_size = st.slider("πŸ“ Size", 10, 24, 14, key="font_size_slider")
 
172
  duration = st.slider("⏱️ Speed", 1, 20, 20, key="duration_slider")
173
 
174
  st.session_state['marquee_settings'].update({
 
179
  })
180
 
181
  def display_marquee(text, settings, key_suffix=""):
182
+ """Show marquee text with style from settings."""
183
  truncated_text = text[:280] + "..." if len(text) > 280 else text
184
  streamlit_marquee(
185
  content=truncated_text,
 
189
  st.write("")
190
 
191
  def get_high_info_terms(text: str, top_n=10) -> list:
192
+ """Extract top_n freq words or bigrams (excluding stopwords)."""
193
  stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
194
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
195
  bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
 
199
  return [term for term, freq in counter.most_common(top_n)]
200
 
201
  def clean_text_for_filename(text: str) -> str:
202
+ """Remove special chars, short words, etc. for filenames."""
203
  text = text.lower()
204
  text = re.sub(r'[^\w\s-]', '', text)
205
  words = text.split()
206
+ # remove short or unhelpful words
207
+ stop_short = set(['the', 'and', 'for', 'with', 'this', 'that', 'ai', 'library'])
208
  filtered = [w for w in words if len(w) > 3 and w not in stop_short]
209
  return '_'.join(filtered)[:200]
210
 
 
 
211
  def generate_filename(prompt, response, file_type="md", max_length=200):
212
  """
213
  Generate a shortened filename by:
214
+ 1) extracting high-info terms,
215
+ 2) snippet from prompt+response,
216
+ 3) remove duplicates,
217
+ 4) truncate if needed.
218
  """
219
  prefix = format_timestamp_prefix() + "_"
220
+ combined_text = (prompt + " " + response)[:200]
221
  info_terms = get_high_info_terms(combined_text, top_n=5)
222
  snippet = (prompt[:40] + " " + response[:40]).strip()
223
  snippet_cleaned = clean_text_for_filename(snippet)
224
+
225
+ # remove duplicates
226
  name_parts = info_terms + [snippet_cleaned]
227
+ seen = set()
228
+ unique_parts = []
229
+ for part in name_parts:
230
+ if part not in seen:
231
+ seen.add(part)
232
+ unique_parts.append(part)
233
+
234
+ full_name = '_'.join(unique_parts).strip('_')
235
  leftover_chars = max_length - len(prefix) - len(file_type) - 1
236
  if len(full_name) > leftover_chars:
237
  full_name = full_name[:leftover_chars]
238
 
239
  return f"{prefix}{full_name}.{file_type}"
240
 
 
241
  def create_file(prompt, response, file_type="md"):
242
+ """Create a text file from prompt + response with sanitized filename."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  filename = generate_filename(prompt.strip(), response.strip(), file_type)
244
  with open(filename, 'w', encoding='utf-8') as f:
245
  f.write(prompt + "\n\n" + response)
246
  return filename
247
 
248
  def get_download_link(file, file_type="zip"):
249
+ """
250
+ Convert a file to base64 and return an HTML link for download.
251
+ """
252
  with open(file, "rb") as f:
253
  b64 = base64.b64encode(f.read()).decode()
254
  if file_type == "zip":
 
263
  return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
264
 
265
  def clean_for_speech(text: str) -> str:
266
+ """Clean up text for TTS output."""
267
  text = text.replace("\n", " ")
268
  text = text.replace("</s>", " ")
269
  text = text.replace("#", "")
 
272
  return text
273
 
274
  async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
275
+ """Async TTS generation with edge-tts library."""
276
  text = clean_for_speech(text)
277
  if not text.strip():
278
  return None
 
284
  return out_fn
285
 
286
  def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
287
+ """Wrapper for the async TTS generate call."""
288
  return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
289
 
290
  def play_and_download_audio(file_path, file_type="mp3"):
291
+ """Streamlit audio + a quick download link."""
292
  if file_path and os.path.exists(file_path):
293
  st.audio(file_path)
294
  dl_link = get_download_link(file_path, file_type=file_type)
295
  st.markdown(dl_link, unsafe_allow_html=True)
296
 
297
  def save_qa_with_audio(question, answer, voice=None):
298
+ """Save Q&A to markdown and also generate audio."""
299
  if not voice:
300
  voice = st.session_state['tts_voice']
301
 
 
302
  combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
303
  md_file = create_file(question, answer, "md")
 
 
304
  audio_text = f"{question}\n\nAnswer: {answer}"
305
  audio_file = speak_with_edge_tts(
306
  audio_text,
307
  voice=voice,
308
  file_format=st.session_state['audio_format']
309
  )
 
310
  return md_file, audio_file
311
 
312
+ # ─────────────────────────────────────────────────────────
313
+ # 3. PAPER PARSING & DISPLAY
314
+ # ─────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
  def parse_arxiv_refs(ref_text: str):
317
+ """
318
+ Given a multi-line markdown with arxiv references, parse them into
319
+ a list of dicts: {date, title, url, authors, summary, ...}.
320
+ """
321
  if not ref_text:
322
  return []
323
 
 
327
 
328
  for i, line in enumerate(lines):
329
  if line.count('|') == 2:
330
+ # Found a new paper line
331
  if current_paper:
332
  results.append(current_paper)
333
  if len(results) >= 20:
334
  break
 
335
  try:
336
  header_parts = line.strip('* ').split('|')
337
  date = header_parts[0].strip()
 
345
  'url': url,
346
  'authors': '',
347
  'summary': '',
348
+ 'full_audio': None,
349
+ 'download_base64': '',
350
  }
351
  except Exception as e:
352
  st.warning(f"Error parsing paper header: {str(e)}")
 
354
  continue
355
 
356
  elif current_paper:
357
+ # If authors not set, fill it; otherwise, fill summary
358
  if not current_paper['authors']:
359
  current_paper['authors'] = line.strip('* ')
360
  else:
 
368
 
369
  return results[:20]
370
 
371
+ def create_paper_links_md(papers):
372
+ """Creates a minimal .md content linking to each paper's arxiv URL."""
373
+ lines = ["# Paper Links\n"]
374
+ for i, p in enumerate(papers, start=1):
375
+ lines.append(f"{i}. **{p['title']}** β€” [Arxiv]({p['url']})")
376
+ return "\n".join(lines)
377
 
378
+ def create_paper_audio_files(papers, input_question):
379
+ """
380
+ For each paper, generate TTS audio summary, store the path in `paper['full_audio']`,
381
+ and also store a base64 link for stable downloading.
382
+ """
383
+ for paper in papers:
384
+ try:
385
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
386
+ audio_text = clean_for_speech(audio_text)
387
+ file_format = st.session_state['audio_format']
388
+ audio_file = speak_with_edge_tts(
389
+ audio_text,
390
+ voice=st.session_state['tts_voice'],
391
+ file_format=file_format
392
+ )
393
+ paper['full_audio'] = audio_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
+ if audio_file:
396
+ with open(audio_file, "rb") as af:
397
+ b64_data = base64.b64encode(af.read()).decode()
398
+ download_filename = os.path.basename(audio_file)
399
+ mime_type = "mpeg" if file_format == "mp3" else "wav"
400
+ paper['download_base64'] = (
401
+ f'<a href="data:audio/{mime_type};base64,{b64_data}" '
402
+ f'download="{download_filename}">🎡 Download {download_filename}</a>'
403
+ )
404
 
405
+ except Exception as e:
406
+ st.warning(f"Error processing paper {paper['title']}: {str(e)}")
407
+ paper['full_audio'] = None
408
+ paper['download_base64'] = ''
409
 
410
+ def display_papers(papers, marquee_settings):
411
+ """Display paper info in the main area with marquee + expanders + audio."""
412
+ st.write("## Research Papers")
413
+ for i, paper in enumerate(papers, start=1):
414
+ marquee_text = f"πŸ“„ {paper['title']} | πŸ‘€ {paper['authors'][:120]} | πŸ“ {paper['summary'][:200]}"
415
+ display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
416
+
417
+ with st.expander(f"{i}. πŸ“„ {paper['title']}", expanded=True):
418
+ st.markdown(f"**{paper['date']} | {paper['title']}** β€” [Arxiv Link]({paper['url']})")
419
+ st.markdown(f"*Authors:* {paper['authors']}")
420
+ st.markdown(paper['summary'])
421
+ if paper.get('full_audio'):
422
+ st.write("πŸ“š Paper Audio")
423
+ st.audio(paper['full_audio'])
424
+ if paper['download_base64']:
425
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
426
+
427
+ def display_papers_in_sidebar(papers):
428
+ """Mirrors the paper listing in the sidebar with expanders, audio, etc."""
429
+ st.sidebar.title("🎢 Papers & Audio")
430
+ for i, paper in enumerate(papers, start=1):
431
+ with st.sidebar.expander(f"{i}. {paper['title']}"):
432
+ st.markdown(f"**Arxiv:** [Link]({paper['url']})")
433
+ if paper['full_audio']:
434
+ st.audio(paper['full_audio'])
435
+ if paper['download_base64']:
436
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
437
+ st.markdown(f"**Authors:** {paper['authors']}")
438
+ if paper['summary']:
439
+ st.markdown(f"**Summary:** {paper['summary'][:300]}...")
440
+
441
+ # ─────────────────────────────────────────────────────────
442
+ # 4. ZIP FUNCTION
443
+ # ─────────────────────────────────────────────────────────
444
 
445
+ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
446
+ """
447
+ Zip up all relevant files, limiting the final zip name to ~20 chars
448
+ to avoid overly long base64 strings.
449
+ """
450
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
451
+ all_files = md_files + mp3_files + wav_files
452
+ if not all_files:
453
+ return None
454
 
455
+ all_content = []
456
+ for f in all_files:
457
+ if f.endswith('.md'):
458
+ with open(f, 'r', encoding='utf-8') as file:
459
+ all_content.append(file.read())
460
+ elif f.endswith('.mp3') or f.endswith('.wav'):
461
+ basename = os.path.splitext(os.path.basename(f))[0]
462
+ words = basename.replace('_', ' ')
463
+ all_content.append(words)
464
+
465
+ all_content.append(input_question)
466
+ combined_content = " ".join(all_content)
467
+ info_terms = get_high_info_terms(combined_content, top_n=10)
468
+
469
+ timestamp = format_timestamp_prefix()
470
+ name_text = '-'.join(term for term in info_terms[:5])
471
+ short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
472
 
473
+ with zipfile.ZipFile(short_zip_name, 'w') as z:
474
+ for f in all_files:
475
+ z.write(f)
476
+ return short_zip_name
477
 
478
+ # ─────────────────────────────────────────────────────────
479
+ # 5. MAIN LOGIC: AI LOOKUP & VOICE INPUT
480
+ # ─────────────────────────────────────────────────────────
481
 
482
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
483
+ titles_summary=True, full_audio=False):
484
+ """Main routine that uses Anthropic (Claude) + Gradio ArXiv RAG pipeline."""
485
+ start = time.time()
486
+ ai_constitution = """
487
+ You are a talented AI coder and songwriter...
488
+ """
489
 
490
+ # --- 1) Claude API
491
  client = anthropic.Anthropic(api_key=anthropic_key)
492
  user_input = q
 
493
  response = client.messages.create(
494
  model="claude-3-sonnet-20240229",
495
  max_tokens=1000,
496
  messages=[
497
  {"role": "user", "content": user_input}
498
  ])
 
499
  st.write("Claude's reply 🧠:")
500
  st.markdown(response.content[0].text)
501
 
502
+ # Save & produce audio
 
503
  result = response.content[0].text
504
+ create_file(q, result)
 
505
  md_file, audio_file = save_qa_with_audio(q, result)
506
  st.subheader("πŸ“ Main Response Audio")
507
  play_and_download_audio(audio_file, st.session_state['audio_format'])
508
 
509
+ # --- 2) Arxiv RAG
510
+ st.write("Arxiv's AI this Evening is Mixtral 8x7B...")
 
 
 
 
 
511
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
512
+ refs = client.predict(
513
+ q,
514
+ 20,
515
+ "Semantic Search",
516
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
517
+ api_name="/update_with_rag_md"
518
+ )[0]
519
+
520
+ r2 = client.predict(
521
+ q,
522
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
523
+ True,
524
+ api_name="/ask_llm"
525
+ )
526
 
527
  result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
 
 
528
  md_file, audio_file = save_qa_with_audio(q, result)
 
529
  st.subheader("πŸ“ Main Response Audio")
530
  play_and_download_audio(audio_file, st.session_state['audio_format'])
531
 
532
+ # --- 3) Parse + handle papers
533
  papers = parse_arxiv_refs(refs)
534
  if papers:
535
+ # Create minimal links page first
536
+ paper_links = create_paper_links_md(papers)
537
+ links_file = create_file(q, paper_links, "md")
538
+ st.markdown(paper_links)
539
+
540
+ # Then create audio for each paper
541
  create_paper_audio_files(papers, input_question=q)
542
  display_papers(papers, get_marquee_settings())
543
+ display_papers_in_sidebar(papers)
544
  else:
545
  st.warning("No papers found in the response.")
546
 
547
+ elapsed = time.time() - start
548
  st.write(f"**Total Elapsed:** {elapsed:.2f} s")
 
549
  return result
550
 
551
  def process_voice_input(text):
552
+ """When user sends voice query, we run the AI lookup + Q&A with audio."""
553
  if not text:
554
  return
 
555
  st.subheader("πŸ” Search Results")
556
  result = perform_ai_lookup(
557
  text,
 
560
  titles_summary=True,
561
  full_audio=True
562
  )
 
563
  md_file, audio_file = save_qa_with_audio(text, result)
 
564
  st.subheader("πŸ“ Generated Files")
565
  st.write(f"Markdown: {md_file}")
566
  st.write(f"Audio: {audio_file}")
567
  play_and_download_audio(audio_file, st.session_state['audio_format'])
568
 
569
+ # ─────────────────────────────────────────────────────────
570
+ # 6. FILE HISTORY SIDEBAR
571
+ # ─────────────────────────────────────────────────────────
572
+
573
+ def display_file_history_in_sidebar():
574
+ """
575
+ Shows a history of each local .md, .mp3, .wav file in descending
576
+ order of modification time, with quick icons and optional download links.
577
+ """
578
+ st.sidebar.markdown("---")
579
+ st.sidebar.markdown("### πŸ“‚ File History")
580
+
581
+ # Gather all files
582
  md_files = glob.glob("*.md")
583
  mp3_files = glob.glob("*.mp3")
584
  wav_files = glob.glob("*.wav")
 
 
585
  all_files = md_files + mp3_files + wav_files
586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
  if not all_files:
588
+ st.sidebar.write("No files found.")
589
+ return
590
+
591
+ # Sort newest first
592
+ all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
593
 
 
594
  for f in all_files:
595
+ fname = os.path.basename(f)
596
+ ext = os.path.splitext(fname)[1].lower().strip('.')
597
+ emoji = FILE_EMOJIS.get(ext, 'πŸ“¦')
598
+ time_str = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
599
+
600
+ with st.sidebar.expander(f"{emoji} {fname}"):
601
+ st.write(f"**Modified:** {time_str}")
602
+ if ext == "md":
603
+ with open(f, "r", encoding="utf-8") as file_in:
604
+ snippet = file_in.read(200).replace("\n", " ")
605
+ if len(snippet) == 200:
606
+ snippet += "..."
607
+ st.write(snippet)
608
+ st.markdown(get_download_link(f, file_type="md"), unsafe_allow_html=True)
609
+ elif ext in ["mp3","wav"]:
610
+ st.audio(f)
611
+ st.markdown(get_download_link(f, file_type=ext), unsafe_allow_html=True)
612
+ else:
613
+ st.markdown(get_download_link(f), unsafe_allow_html=True)
614
+
615
+ # ─────────────────────────────────────────────────────────
616
+ # 7. MAIN APP
617
+ # ─────────────────────────────────────────────────────────
618
 
619
  def main():
620
+ # 1) Setup marquee UI in the sidebar
621
  update_marquee_settings_ui()
622
  marquee_settings = get_marquee_settings()
623
+
624
+ # 2) Display the marquee welcome
625
  display_marquee(st.session_state['marquee_content'],
626
+ {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
627
+ key_suffix="welcome")
628
 
629
+ # 3) Main action tabs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630
  tab_main = st.radio("Action:", ["🎀 Voice", "πŸ“Έ Media", "πŸ” ArXiv", "πŸ“ Editor"],
631
+ horizontal=True)
632
 
633
+ # Example custom component usage
634
  mycomponent = components.declare_component("mycomponent", path="mycomponent")
635
  val = mycomponent(my_input_value="Hello")
636
 
637
  if val:
638
  val_stripped = val.replace('\\n', ' ')
639
  edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
 
640
  run_option = st.selectbox("Model:", ["Arxiv"])
641
  col1, col2 = st.columns(2)
642
  with col1:
 
649
  if autorun and input_changed:
650
  st.session_state.old_val = val
651
  st.session_state.last_query = edited_input
652
+ perform_ai_lookup(edited_input,
653
+ vocal_summary=True,
654
+ extended_refs=False,
655
+ titles_summary=True,
656
+ full_audio=full_audio)
657
  else:
658
  if st.button("β–Ά Run"):
659
  st.session_state.old_val = val
660
  st.session_state.last_query = edited_input
661
+ perform_ai_lookup(edited_input,
662
+ vocal_summary=True,
663
+ extended_refs=False,
664
+ titles_summary=True,
665
+ full_audio=full_audio)
666
 
667
+ # ─────────────────────────────────────────────────────────
668
+ # TAB: ArXiv
669
+ # ─────────────────────────────────────────────────────────
670
  if tab_main == "πŸ” ArXiv":
671
  st.subheader("πŸ” Query ArXiv")
672
  q = st.text_input("πŸ” Query:", key="arxiv_query")
673
+
674
  st.markdown("### πŸŽ› Options")
675
  vocal_summary = st.checkbox("πŸŽ™ShortAudio", value=True, key="option_vocal_summary")
676
  extended_refs = st.checkbox("πŸ“œLongRefs", value=False, key="option_extended_refs")
 
685
  if full_transcript:
686
  create_file(q, result, "md")
687
 
688
+ # ─────────────────────────────────────────────────────────
689
+ # TAB: Voice
690
+ # ─────────────────────────────────────────────────────────
691
  elif tab_main == "🎀 Voice":
692
  st.subheader("🎀 Voice Input")
693
 
 
694
  st.markdown("### 🎀 Voice Settings")
695
  selected_voice = st.selectbox(
696
  "Select TTS Voice:",
 
698
  index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
699
  )
700
 
 
701
  st.markdown("### πŸ”Š Audio Format")
702
  selected_format = st.radio(
703
  "Choose Audio Format:",
 
705
  index=0
706
  )
707
 
708
+ # Update session state if voice/format changes
709
  if selected_voice != st.session_state['tts_voice']:
710
  st.session_state['tts_voice'] = selected_voice
711
  st.rerun()
 
713
  st.session_state['audio_format'] = selected_format.lower()
714
  st.rerun()
715
 
716
+ # Input text
717
  user_text = st.text_area("πŸ’¬ Message:", height=100)
718
  user_text = user_text.strip().replace('\n', ' ')
719
 
 
725
  st.write("**You:**", c["user"])
726
  st.write("**Response:**", c["claude"])
727
 
728
+ # ─────────────────────────────────────────────────────────
729
+ # TAB: Media
730
+ # ─────────────────────────────────────────────────────────
731
  elif tab_main == "πŸ“Έ Media":
732
+ st.header("πŸ“Έ Media Gallery")
733
+
734
+ # By default, show audio first
735
+ tabs = st.tabs(["🎡 Audio", "πŸ–Ό Images", "πŸŽ₯ Video"])
736
+
737
+ # AUDIO sub-tab
738
  with tabs[0]:
739
+ st.subheader("🎡 Audio Files")
740
+ audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
741
+ if audio_files:
742
+ for a in audio_files:
743
+ with st.expander(os.path.basename(a)):
744
+ st.audio(a)
745
+ ext = os.path.splitext(a)[1].replace('.', '')
746
+ dl_link = get_download_link(a, file_type=ext)
747
+ st.markdown(dl_link, unsafe_allow_html=True)
748
+ else:
749
+ st.write("No audio files found.")
750
+
751
+ # IMAGES sub-tab
752
+ with tabs[1]:
753
+ st.subheader("πŸ–Ό Image Files")
754
+ imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
755
  if imgs:
756
+ c = st.slider("Cols", 1, 5, 3, key="cols_images")
757
  cols = st.columns(c)
758
  for i, f in enumerate(imgs):
759
  with cols[i % c]:
760
  st.image(Image.open(f), use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
761
  else:
762
  st.write("No images found.")
763
 
764
+ # VIDEO sub-tab
765
+ with tabs[2]:
766
+ st.subheader("πŸŽ₯ Video Files")
767
+ vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
768
  if vids:
769
  for v in vids:
770
+ with st.expander(os.path.basename(v)):
771
  st.video(v)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
772
  else:
773
  st.write("No videos found.")
774
 
775
+ # ─────────────────────────────────────────────────────────
776
+ # TAB: Editor
777
+ # ─────────────────────────────────────────────────────────
778
  elif tab_main == "πŸ“ Editor":
779
+ st.write("Select or create a file to edit. (Currently minimal demo)")
780
+
781
+ # ─────────────────────────────────────────────────────────
782
+ # SIDEBAR: FILE HISTORY
783
+ # ─────────────────────────────────────────────────────────
784
+ display_file_history_in_sidebar()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
785
 
786
+ # Some light CSS styling
787
  st.markdown("""
788
  <style>
789
  .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
 
792
  </style>
793
  """, unsafe_allow_html=True)
794
 
795
+ # Rerun if needed
796
  if st.session_state.should_rerun:
797
  st.session_state.should_rerun = False
798
  st.rerun()