awacke1 commited on
Commit
7104798
·
verified ·
1 Parent(s): 05a7287

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -137
app.py CHANGED
@@ -50,12 +50,11 @@ EDGE_TTS_VOICES = [
50
 
51
  # Initialize session state variables
52
  if 'marquee_settings' not in st.session_state:
53
- # Default to 20s animationDuration instead of 10s:
54
  st.session_state['marquee_settings'] = {
55
  "background": "#1E1E1E",
56
  "color": "#FFFFFF",
57
  "font-size": "14px",
58
- "animationDuration": "20s", # <- changed to 20s
59
  "width": "100%",
60
  "lineHeight": "35px"
61
  }
@@ -91,7 +90,6 @@ if 'last_query' not in st.session_state:
91
  if 'marquee_content' not in st.session_state:
92
  st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
93
 
94
- # 🔑 2. API Setup & Clients
95
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
96
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
97
  xai_key = os.getenv('xai',"")
@@ -105,7 +103,6 @@ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_OR
105
  HF_KEY = os.getenv('HF_KEY')
106
  API_URL = os.getenv('API_URL')
107
 
108
- # Constants
109
  FILE_EMOJIS = {
110
  "md": "📝",
111
  "mp3": "🎵",
@@ -113,17 +110,14 @@ FILE_EMOJIS = {
113
  }
114
 
115
  def get_central_time():
116
- """Get current time in US Central timezone"""
117
  central = pytz.timezone('US/Central')
118
  return datetime.now(central)
119
 
120
  def format_timestamp_prefix():
121
- """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM"""
122
  ct = get_central_time()
123
  return ct.strftime("%m_%d_%y_%I_%M_%p")
124
 
125
  def initialize_marquee_settings():
126
- """Initialize marquee settings in session state"""
127
  if 'marquee_settings' not in st.session_state:
128
  st.session_state['marquee_settings'] = {
129
  "background": "#1E1E1E",
@@ -135,13 +129,10 @@ def initialize_marquee_settings():
135
  }
136
 
137
  def get_marquee_settings():
138
- """Get or update marquee settings from session state"""
139
  initialize_marquee_settings()
140
  return st.session_state['marquee_settings']
141
 
142
  def update_marquee_settings_ui():
143
- """Update marquee settings via UI controls"""
144
- initialize_marquee_settings()
145
  st.sidebar.markdown("### 🎯 Marquee Settings")
146
  cols = st.sidebar.columns(2)
147
  with cols[0]:
@@ -163,7 +154,6 @@ def update_marquee_settings_ui():
163
  })
164
 
165
  def display_marquee(text, settings, key_suffix=""):
166
- """Display marquee with given text and settings"""
167
  truncated_text = text[:280] + "..." if len(text) > 280 else text
168
  streamlit_marquee(
169
  content=truncated_text,
@@ -173,9 +163,6 @@ def display_marquee(text, settings, key_suffix=""):
173
  st.write("")
174
 
175
  def get_high_info_terms(text: str, top_n=10) -> list:
176
- """
177
- Finds the top_n frequent words or bigrams (excluding some common stopwords).
178
- """
179
  stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
180
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
181
  bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
@@ -185,9 +172,6 @@ def get_high_info_terms(text: str, top_n=10) -> list:
185
  return [term for term, freq in counter.most_common(top_n)]
186
 
187
  def clean_text_for_filename(text: str) -> str:
188
- """
189
- Cleans a text so it can be used in a filename.
190
- """
191
  text = text.lower()
192
  text = re.sub(r'[^\w\s-]', '', text)
193
  words = text.split()
@@ -196,23 +180,13 @@ def clean_text_for_filename(text: str) -> str:
196
  filtered = [w for w in words if len(w) > 3 and w not in stop_short]
197
  return '_'.join(filtered)[:200]
198
 
199
-
200
  def generate_filename(prompt, response, file_type="md", max_length=200):
201
- """
202
- Generate a shortened filename by:
203
- 1. Extracting high-info terms
204
- 2. Creating a smaller snippet
205
- 3. Cleaning & joining them
206
- 4. Removing duplicates
207
- 5. Truncating if needed
208
- """
209
  prefix = format_timestamp_prefix() + "_"
210
  combined_text = (prompt + " " + response)[:200]
211
  info_terms = get_high_info_terms(combined_text, top_n=5)
212
  snippet = (prompt[:40] + " " + response[:40]).strip()
213
  snippet_cleaned = clean_text_for_filename(snippet)
214
-
215
- # Combine info terms + snippet, remove duplicates
216
  name_parts = info_terms + [snippet_cleaned]
217
  seen = set()
218
  unique_parts = []
@@ -228,20 +202,13 @@ def generate_filename(prompt, response, file_type="md", max_length=200):
228
 
229
  return f"{prefix}{full_name}.{file_type}"
230
 
231
-
232
  def create_file(prompt, response, file_type="md"):
233
- """
234
- Create a file using the shortened filename from generate_filename().
235
- """
236
  filename = generate_filename(prompt.strip(), response.strip(), file_type)
237
  with open(filename, 'w', encoding='utf-8') as f:
238
  f.write(prompt + "\n\n" + response)
239
  return filename
240
 
241
  def get_download_link(file, file_type="zip"):
242
- """
243
- Returns an HTML anchor tag for downloading the specified file (base64-encoded).
244
- """
245
  with open(file, "rb") as f:
246
  b64 = base64.b64encode(f.read()).decode()
247
  if file_type == "zip":
@@ -256,9 +223,6 @@ def get_download_link(file, file_type="zip"):
256
  return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
257
 
258
  def clean_for_speech(text: str) -> str:
259
- """
260
- Cleans text to make TTS output more coherent.
261
- """
262
  text = text.replace("\n", " ")
263
  text = text.replace("</s>", " ")
264
  text = text.replace("#", "")
@@ -281,36 +245,25 @@ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_fo
281
  return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
282
 
283
  def play_and_download_audio(file_path, file_type="mp3"):
284
- """Play audio and show a direct download link in the main area."""
285
  if file_path and os.path.exists(file_path):
286
  st.audio(file_path)
287
  dl_link = get_download_link(file_path, file_type=file_type)
288
  st.markdown(dl_link, unsafe_allow_html=True)
289
 
290
  def save_qa_with_audio(question, answer, voice=None):
291
- """Save Q&A to markdown and generate audio file."""
292
  if not voice:
293
  voice = st.session_state['tts_voice']
294
-
295
- # Create markdown file
296
  combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
297
  md_file = create_file(question, answer, "md")
298
-
299
- # Generate audio file
300
  audio_text = f"{question}\n\nAnswer: {answer}"
301
  audio_file = speak_with_edge_tts(
302
  audio_text,
303
  voice=voice,
304
  file_format=st.session_state['audio_format']
305
  )
306
-
307
  return md_file, audio_file
308
 
309
  def parse_arxiv_refs(ref_text: str):
310
- """
311
- Given a multi-line markdown with arxiv references, parse them into
312
- a structure: [{date, title, url, authors, summary}, ...]
313
- """
314
  if not ref_text:
315
  return []
316
 
@@ -320,19 +273,16 @@ def parse_arxiv_refs(ref_text: str):
320
 
321
  for i, line in enumerate(lines):
322
  if line.count('|') == 2:
323
- # We found a new paper header line
324
  if current_paper:
325
  results.append(current_paper)
326
  if len(results) >= 20:
327
  break
328
-
329
  try:
330
  header_parts = line.strip('* ').split('|')
331
  date = header_parts[0].strip()
332
  title = header_parts[1].strip()
333
  url_match = re.search(r'(https://arxiv.org/\S+)', line)
334
  url = url_match.group(1) if url_match else f"paper_{len(results)}"
335
-
336
  current_paper = {
337
  'date': date,
338
  'title': title,
@@ -346,9 +296,7 @@ def parse_arxiv_refs(ref_text: str):
346
  st.warning(f"Error parsing paper header: {str(e)}")
347
  current_paper = {}
348
  continue
349
-
350
  elif current_paper:
351
- # Fill authors if empty, else fill summary
352
  if not current_paper['authors']:
353
  current_paper['authors'] = line.strip('* ')
354
  else:
@@ -356,31 +304,20 @@ def parse_arxiv_refs(ref_text: str):
356
  current_paper['summary'] += ' ' + line.strip()
357
  else:
358
  current_paper['summary'] = line.strip()
359
-
360
  if current_paper:
361
  results.append(current_paper)
362
 
363
  return results[:20]
364
 
365
  def create_paper_links_md(papers):
366
- """
367
- Creates a minimal markdown list of paper titles + arxiv links
368
- (and if you store PDF links, you could also include them).
369
- """
370
  lines = ["# Paper Links\n"]
371
  for i, p in enumerate(papers, start=1):
372
- # Basic link
373
  lines.append(f"{i}. **{p['title']}** — [Arxiv]({p['url']})")
374
  return "\n".join(lines)
375
 
376
  def create_paper_audio_files(papers, input_question):
377
- """
378
- Generate TTS audio for each paper, store base64 link for stable download,
379
- and attach to each paper dict.
380
- """
381
  for paper in papers:
382
  try:
383
- # Just a short version for TTS
384
  audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
385
  audio_text = clean_for_speech(audio_text)
386
 
@@ -392,11 +329,9 @@ def create_paper_audio_files(papers, input_question):
392
  )
393
  paper['full_audio'] = audio_file
394
 
395
- # Store a base64 link with consistent name
396
  if audio_file:
397
  with open(audio_file, "rb") as af:
398
  b64_data = base64.b64encode(af.read()).decode()
399
- # We'll keep the original file's name as the stable download name
400
  download_filename = os.path.basename(audio_file)
401
  mime_type = "mpeg" if file_format == "mp3" else "wav"
402
  paper['download_base64'] = (
@@ -410,13 +345,8 @@ def create_paper_audio_files(papers, input_question):
410
  paper['download_base64'] = ''
411
 
412
  def display_papers(papers, marquee_settings):
413
- """
414
- Display the papers in the main area with marquee + expanders + audio.
415
- """
416
  st.write("## Research Papers")
417
-
418
  for i, paper in enumerate(papers, start=1):
419
- # Show marquee
420
  marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]} | 📝 {paper['summary'][:200]}"
421
  display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
422
 
@@ -424,7 +354,6 @@ def display_papers(papers, marquee_settings):
424
  st.markdown(f"**{paper['date']} | {paper['title']} |** [Arxiv Link]({paper['url']})")
425
  st.markdown(f"*Authors:* {paper['authors']}")
426
  st.markdown(paper['summary'])
427
-
428
  if paper.get('full_audio'):
429
  st.write("📚 Paper Audio")
430
  st.audio(paper['full_audio'])
@@ -432,10 +361,6 @@ def display_papers(papers, marquee_settings):
432
  st.markdown(paper['download_base64'], unsafe_allow_html=True)
433
 
434
  def display_papers_in_sidebar(papers):
435
- """
436
- New approach: in the sidebar, mirror the paper listing
437
- with expanders for each paper, link to arxiv, st.audio, etc.
438
- """
439
  st.sidebar.title("🎶 Papers & Audio")
440
  for i, paper in enumerate(papers, start=1):
441
  with st.sidebar.expander(f"{i}. {paper['title']}"):
@@ -444,15 +369,11 @@ def display_papers_in_sidebar(papers):
444
  st.audio(paper['full_audio'])
445
  if paper['download_base64']:
446
  st.markdown(paper['download_base64'], unsafe_allow_html=True)
447
- # Show minimal text if desired:
448
  st.markdown(f"**Authors:** {paper['authors']}")
449
  if paper['summary']:
450
  st.markdown(f"**Summary:** {paper['summary'][:300]}...")
451
 
452
  def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
453
- """
454
- Zip up all relevant files, but limit final zip name to 20 chars.
455
- """
456
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
457
  all_files = md_files + mp3_files + wav_files
458
  if not all_files:
@@ -464,7 +385,6 @@ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
464
  with open(f, 'r', encoding='utf-8') as file:
465
  all_content.append(file.read())
466
  elif f.endswith('.mp3') or f.endswith('.wav'):
467
- # Add some text representation
468
  basename = os.path.splitext(os.path.basename(f))[0]
469
  words = basename.replace('_', ' ')
470
  all_content.append(words)
@@ -474,82 +394,61 @@ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
474
  info_terms = get_high_info_terms(combined_content, top_n=10)
475
 
476
  timestamp = format_timestamp_prefix()
477
- name_text = '-'.join(term for term in info_terms[:5]) # shorter
478
- # Limit the final name to 20 chars (excluding .zip)
479
  short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
480
-
481
  with zipfile.ZipFile(short_zip_name, 'w') as z:
482
  for f in all_files:
483
  z.write(f)
484
-
485
  return short_zip_name
486
 
487
-
488
- # ---------------------------- 1/11/2025 - add a constitution to my arxiv system templating to build configurable personality
489
-
490
  def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
491
  titles_summary=True, full_audio=False):
492
  start = time.time()
493
-
494
  ai_constitution = """
495
- You are a talented AI coder and songwriter with a unique ability to explain scientific concepts through music with code easter eggs.. Your task is to create a song that not only entertains but also educates listeners about a specific science problem and its potential solutions.
496
-
497
- (Omitted extra instructions for brevity...)
498
  """
499
-
500
  # Claude:
501
  client = anthropic.Anthropic(api_key=anthropic_key)
502
  user_input = q
503
-
504
  response = client.messages.create(
505
  model="claude-3-sonnet-20240229",
506
  max_tokens=1000,
507
  messages=[
508
  {"role": "user", "content": user_input}
509
  ])
510
-
511
  st.write("Claude's reply 🧠:")
512
  st.markdown(response.content[0].text)
513
 
514
- # Save and produce audio for Claude response
515
  result = response.content[0].text
516
- create_file(q, result) # MD file
517
  md_file, audio_file = save_qa_with_audio(q, result)
518
  st.subheader("📝 Main Response Audio")
519
  play_and_download_audio(audio_file, st.session_state['audio_format'])
520
 
521
-
522
  # Arxiv:
523
- st.write("Arxiv's AI this Evening is Mixtral 8x7B MoE Instruct with 9 English Voices 🧠:")
524
-
525
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
526
  refs = client.predict(q, 20, "Semantic Search",
527
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
528
  api_name="/update_with_rag_md")[0]
529
-
530
  r2 = client.predict(q, "mistralai/Mixtral-8x7B-Instruct-v0.1",
531
  True, api_name="/ask_llm")
532
 
533
  result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
534
-
535
- # Save and produce audio for second response
536
  md_file, audio_file = save_qa_with_audio(q, result)
537
-
538
  st.subheader("📝 Main Response Audio")
539
  play_and_download_audio(audio_file, st.session_state['audio_format'])
540
 
541
  papers = parse_arxiv_refs(refs)
542
  if papers:
543
- # 4) Create & show a minimal markdown links page before generating audio
544
  paper_links = create_paper_links_md(papers)
545
  links_file = create_file(q, paper_links, "md")
546
  st.markdown(paper_links)
547
 
548
- # Now produce audio for each paper
549
  create_paper_audio_files(papers, input_question=q)
550
  display_papers(papers, get_marquee_settings())
551
-
552
- # Also display in the sidebar as requested
553
  display_papers_in_sidebar(papers)
554
  else:
555
  st.warning("No papers found in the response.")
@@ -561,7 +460,6 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
561
  def process_voice_input(text):
562
  if not text:
563
  return
564
-
565
  st.subheader("🔍 Search Results")
566
  result = perform_ai_lookup(
567
  text,
@@ -570,38 +468,77 @@ def process_voice_input(text):
570
  titles_summary=True,
571
  full_audio=True
572
  )
573
-
574
- # Save final Q&A with audio
575
  md_file, audio_file = save_qa_with_audio(text, result)
576
-
577
  st.subheader("📝 Generated Files")
578
  st.write(f"Markdown: {md_file}")
579
  st.write(f"Audio: {audio_file}")
580
  play_and_download_audio(audio_file, st.session_state['audio_format'])
581
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
  def main():
583
- # Update marquee settings UI
584
  update_marquee_settings_ui()
585
  marquee_settings = get_marquee_settings()
586
-
587
- # Initial welcome marquee
588
  display_marquee(st.session_state['marquee_content'],
589
  {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
590
  key_suffix="welcome")
591
 
592
- # Main action tabs
593
  tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
594
  horizontal=True)
595
 
596
- # Simple example usage of a Streamlit component (placeholder)
597
  mycomponent = components.declare_component("mycomponent", path="mycomponent")
598
  val = mycomponent(my_input_value="Hello")
599
 
600
- # Quick example - if the component returns text:
601
  if val:
602
  val_stripped = val.replace('\\n', ' ')
603
  edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
604
-
605
  run_option = st.selectbox("Model:", ["Arxiv"])
606
  col1, col2 = st.columns(2)
607
  with col1:
@@ -629,11 +566,9 @@ def main():
629
  titles_summary=True,
630
  full_audio=full_audio)
631
 
632
- # --- Tab: ArXiv
633
  if tab_main == "🔍 ArXiv":
634
  st.subheader("🔍 Query ArXiv")
635
  q = st.text_input("🔍 Query:", key="arxiv_query")
636
-
637
  st.markdown("### 🎛 Options")
638
  vocal_summary = st.checkbox("🎙ShortAudio", value=True, key="option_vocal_summary")
639
  extended_refs = st.checkbox("📜LongRefs", value=False, key="option_extended_refs")
@@ -648,18 +583,14 @@ def main():
648
  if full_transcript:
649
  create_file(q, result, "md")
650
 
651
- # --- Tab: Voice
652
  elif tab_main == "🎤 Voice":
653
  st.subheader("🎤 Voice Input")
654
-
655
- # Voice and format settings
656
  st.markdown("### 🎤 Voice Settings")
657
  selected_voice = st.selectbox(
658
  "Select TTS Voice:",
659
  options=EDGE_TTS_VOICES,
660
  index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
661
  )
662
-
663
  st.markdown("### 🔊 Audio Format")
664
  selected_format = st.radio(
665
  "Choose Audio Format:",
@@ -674,7 +605,6 @@ def main():
674
  st.session_state['audio_format'] = selected_format.lower()
675
  st.rerun()
676
 
677
- # User text
678
  user_text = st.text_area("💬 Message:", height=100)
679
  user_text = user_text.strip().replace('\n', ' ')
680
 
@@ -686,11 +616,9 @@ def main():
686
  st.write("**You:**", c["user"])
687
  st.write("**Response:**", c["claude"])
688
 
689
- # --- Tab: Media
690
  elif tab_main == "📸 Media":
691
  st.header("📸 Media Gallery")
692
- tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"]) # audio first = default
693
- # --- Audio Tab
694
  with tabs[0]:
695
  st.subheader("🎵 Audio Files")
696
  audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
@@ -703,8 +631,6 @@ def main():
703
  st.markdown(dl_link, unsafe_allow_html=True)
704
  else:
705
  st.write("No audio files found.")
706
-
707
- # --- Images Tab
708
  with tabs[1]:
709
  st.subheader("🖼 Image Files")
710
  imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
@@ -716,8 +642,6 @@ def main():
716
  st.image(Image.open(f), use_container_width=True)
717
  else:
718
  st.write("No images found.")
719
-
720
- # --- Video Tab
721
  with tabs[2]:
722
  st.subheader("🎥 Video Files")
723
  vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
@@ -728,9 +652,11 @@ def main():
728
  else:
729
  st.write("No videos found.")
730
 
731
- # --- Tab: Editor
732
  elif tab_main == "📝 Editor":
733
- st.write("Select or create a file to edit. (Currently minimal demo)")
 
 
 
734
 
735
  st.markdown("""
736
  <style>
@@ -744,6 +670,5 @@ def main():
744
  st.session_state.should_rerun = False
745
  st.rerun()
746
 
747
-
748
  if __name__ == "__main__":
749
  main()
 
50
 
51
  # Initialize session state variables
52
  if 'marquee_settings' not in st.session_state:
 
53
  st.session_state['marquee_settings'] = {
54
  "background": "#1E1E1E",
55
  "color": "#FFFFFF",
56
  "font-size": "14px",
57
+ "animationDuration": "20s",
58
  "width": "100%",
59
  "lineHeight": "35px"
60
  }
 
90
  if 'marquee_content' not in st.session_state:
91
  st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
92
 
 
93
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
94
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
95
  xai_key = os.getenv('xai',"")
 
103
  HF_KEY = os.getenv('HF_KEY')
104
  API_URL = os.getenv('API_URL')
105
 
 
106
  FILE_EMOJIS = {
107
  "md": "📝",
108
  "mp3": "🎵",
 
110
  }
111
 
112
  def get_central_time():
 
113
  central = pytz.timezone('US/Central')
114
  return datetime.now(central)
115
 
116
  def format_timestamp_prefix():
 
117
  ct = get_central_time()
118
  return ct.strftime("%m_%d_%y_%I_%M_%p")
119
 
120
  def initialize_marquee_settings():
 
121
  if 'marquee_settings' not in st.session_state:
122
  st.session_state['marquee_settings'] = {
123
  "background": "#1E1E1E",
 
129
  }
130
 
131
  def get_marquee_settings():
 
132
  initialize_marquee_settings()
133
  return st.session_state['marquee_settings']
134
 
135
  def update_marquee_settings_ui():
 
 
136
  st.sidebar.markdown("### 🎯 Marquee Settings")
137
  cols = st.sidebar.columns(2)
138
  with cols[0]:
 
154
  })
155
 
156
  def display_marquee(text, settings, key_suffix=""):
 
157
  truncated_text = text[:280] + "..." if len(text) > 280 else text
158
  streamlit_marquee(
159
  content=truncated_text,
 
163
  st.write("")
164
 
165
  def get_high_info_terms(text: str, top_n=10) -> list:
 
 
 
166
  stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
167
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
168
  bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
 
172
  return [term for term, freq in counter.most_common(top_n)]
173
 
174
  def clean_text_for_filename(text: str) -> str:
 
 
 
175
  text = text.lower()
176
  text = re.sub(r'[^\w\s-]', '', text)
177
  words = text.split()
 
180
  filtered = [w for w in words if len(w) > 3 and w not in stop_short]
181
  return '_'.join(filtered)[:200]
182
 
 
183
  def generate_filename(prompt, response, file_type="md", max_length=200):
 
 
 
 
 
 
 
 
184
  prefix = format_timestamp_prefix() + "_"
185
  combined_text = (prompt + " " + response)[:200]
186
  info_terms = get_high_info_terms(combined_text, top_n=5)
187
  snippet = (prompt[:40] + " " + response[:40]).strip()
188
  snippet_cleaned = clean_text_for_filename(snippet)
189
+ # remove duplicates
 
190
  name_parts = info_terms + [snippet_cleaned]
191
  seen = set()
192
  unique_parts = []
 
202
 
203
  return f"{prefix}{full_name}.{file_type}"
204
 
 
205
  def create_file(prompt, response, file_type="md"):
 
 
 
206
  filename = generate_filename(prompt.strip(), response.strip(), file_type)
207
  with open(filename, 'w', encoding='utf-8') as f:
208
  f.write(prompt + "\n\n" + response)
209
  return filename
210
 
211
  def get_download_link(file, file_type="zip"):
 
 
 
212
  with open(file, "rb") as f:
213
  b64 = base64.b64encode(f.read()).decode()
214
  if file_type == "zip":
 
223
  return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
224
 
225
  def clean_for_speech(text: str) -> str:
 
 
 
226
  text = text.replace("\n", " ")
227
  text = text.replace("</s>", " ")
228
  text = text.replace("#", "")
 
245
  return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
246
 
247
  def play_and_download_audio(file_path, file_type="mp3"):
 
248
  if file_path and os.path.exists(file_path):
249
  st.audio(file_path)
250
  dl_link = get_download_link(file_path, file_type=file_type)
251
  st.markdown(dl_link, unsafe_allow_html=True)
252
 
253
  def save_qa_with_audio(question, answer, voice=None):
 
254
  if not voice:
255
  voice = st.session_state['tts_voice']
 
 
256
  combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
257
  md_file = create_file(question, answer, "md")
 
 
258
  audio_text = f"{question}\n\nAnswer: {answer}"
259
  audio_file = speak_with_edge_tts(
260
  audio_text,
261
  voice=voice,
262
  file_format=st.session_state['audio_format']
263
  )
 
264
  return md_file, audio_file
265
 
266
  def parse_arxiv_refs(ref_text: str):
 
 
 
 
267
  if not ref_text:
268
  return []
269
 
 
273
 
274
  for i, line in enumerate(lines):
275
  if line.count('|') == 2:
 
276
  if current_paper:
277
  results.append(current_paper)
278
  if len(results) >= 20:
279
  break
 
280
  try:
281
  header_parts = line.strip('* ').split('|')
282
  date = header_parts[0].strip()
283
  title = header_parts[1].strip()
284
  url_match = re.search(r'(https://arxiv.org/\S+)', line)
285
  url = url_match.group(1) if url_match else f"paper_{len(results)}"
 
286
  current_paper = {
287
  'date': date,
288
  'title': title,
 
296
  st.warning(f"Error parsing paper header: {str(e)}")
297
  current_paper = {}
298
  continue
 
299
  elif current_paper:
 
300
  if not current_paper['authors']:
301
  current_paper['authors'] = line.strip('* ')
302
  else:
 
304
  current_paper['summary'] += ' ' + line.strip()
305
  else:
306
  current_paper['summary'] = line.strip()
 
307
  if current_paper:
308
  results.append(current_paper)
309
 
310
  return results[:20]
311
 
312
  def create_paper_links_md(papers):
 
 
 
 
313
  lines = ["# Paper Links\n"]
314
  for i, p in enumerate(papers, start=1):
 
315
  lines.append(f"{i}. **{p['title']}** — [Arxiv]({p['url']})")
316
  return "\n".join(lines)
317
 
318
  def create_paper_audio_files(papers, input_question):
 
 
 
 
319
  for paper in papers:
320
  try:
 
321
  audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
322
  audio_text = clean_for_speech(audio_text)
323
 
 
329
  )
330
  paper['full_audio'] = audio_file
331
 
 
332
  if audio_file:
333
  with open(audio_file, "rb") as af:
334
  b64_data = base64.b64encode(af.read()).decode()
 
335
  download_filename = os.path.basename(audio_file)
336
  mime_type = "mpeg" if file_format == "mp3" else "wav"
337
  paper['download_base64'] = (
 
345
  paper['download_base64'] = ''
346
 
347
  def display_papers(papers, marquee_settings):
 
 
 
348
  st.write("## Research Papers")
 
349
  for i, paper in enumerate(papers, start=1):
 
350
  marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]} | 📝 {paper['summary'][:200]}"
351
  display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
352
 
 
354
  st.markdown(f"**{paper['date']} | {paper['title']} |** [Arxiv Link]({paper['url']})")
355
  st.markdown(f"*Authors:* {paper['authors']}")
356
  st.markdown(paper['summary'])
 
357
  if paper.get('full_audio'):
358
  st.write("📚 Paper Audio")
359
  st.audio(paper['full_audio'])
 
361
  st.markdown(paper['download_base64'], unsafe_allow_html=True)
362
 
363
  def display_papers_in_sidebar(papers):
 
 
 
 
364
  st.sidebar.title("🎶 Papers & Audio")
365
  for i, paper in enumerate(papers, start=1):
366
  with st.sidebar.expander(f"{i}. {paper['title']}"):
 
369
  st.audio(paper['full_audio'])
370
  if paper['download_base64']:
371
  st.markdown(paper['download_base64'], unsafe_allow_html=True)
 
372
  st.markdown(f"**Authors:** {paper['authors']}")
373
  if paper['summary']:
374
  st.markdown(f"**Summary:** {paper['summary'][:300]}...")
375
 
376
  def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
 
 
 
377
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
378
  all_files = md_files + mp3_files + wav_files
379
  if not all_files:
 
385
  with open(f, 'r', encoding='utf-8') as file:
386
  all_content.append(file.read())
387
  elif f.endswith('.mp3') or f.endswith('.wav'):
 
388
  basename = os.path.splitext(os.path.basename(f))[0]
389
  words = basename.replace('_', ' ')
390
  all_content.append(words)
 
394
  info_terms = get_high_info_terms(combined_content, top_n=10)
395
 
396
  timestamp = format_timestamp_prefix()
397
+ name_text = '-'.join(term for term in info_terms[:5])
 
398
  short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
399
+
400
  with zipfile.ZipFile(short_zip_name, 'w') as z:
401
  for f in all_files:
402
  z.write(f)
 
403
  return short_zip_name
404
 
 
 
 
405
  def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
406
  titles_summary=True, full_audio=False):
407
  start = time.time()
 
408
  ai_constitution = """
409
+ You are a talented AI coder and songwriter...
 
 
410
  """
 
411
  # Claude:
412
  client = anthropic.Anthropic(api_key=anthropic_key)
413
  user_input = q
 
414
  response = client.messages.create(
415
  model="claude-3-sonnet-20240229",
416
  max_tokens=1000,
417
  messages=[
418
  {"role": "user", "content": user_input}
419
  ])
 
420
  st.write("Claude's reply 🧠:")
421
  st.markdown(response.content[0].text)
422
 
 
423
  result = response.content[0].text
424
+ create_file(q, result)
425
  md_file, audio_file = save_qa_with_audio(q, result)
426
  st.subheader("📝 Main Response Audio")
427
  play_and_download_audio(audio_file, st.session_state['audio_format'])
428
 
 
429
  # Arxiv:
430
+ st.write("Arxiv's AI this Evening...")
431
+ from gradio_client import Client
432
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
433
  refs = client.predict(q, 20, "Semantic Search",
434
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
435
  api_name="/update_with_rag_md")[0]
 
436
  r2 = client.predict(q, "mistralai/Mixtral-8x7B-Instruct-v0.1",
437
  True, api_name="/ask_llm")
438
 
439
  result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
 
 
440
  md_file, audio_file = save_qa_with_audio(q, result)
 
441
  st.subheader("📝 Main Response Audio")
442
  play_and_download_audio(audio_file, st.session_state['audio_format'])
443
 
444
  papers = parse_arxiv_refs(refs)
445
  if papers:
 
446
  paper_links = create_paper_links_md(papers)
447
  links_file = create_file(q, paper_links, "md")
448
  st.markdown(paper_links)
449
 
 
450
  create_paper_audio_files(papers, input_question=q)
451
  display_papers(papers, get_marquee_settings())
 
 
452
  display_papers_in_sidebar(papers)
453
  else:
454
  st.warning("No papers found in the response.")
 
460
  def process_voice_input(text):
461
  if not text:
462
  return
 
463
  st.subheader("🔍 Search Results")
464
  result = perform_ai_lookup(
465
  text,
 
468
  titles_summary=True,
469
  full_audio=True
470
  )
 
 
471
  md_file, audio_file = save_qa_with_audio(text, result)
 
472
  st.subheader("📝 Generated Files")
473
  st.write(f"Markdown: {md_file}")
474
  st.write(f"Audio: {audio_file}")
475
  play_and_download_audio(audio_file, st.session_state['audio_format'])
476
 
477
+ # ----------------------------------------------------------------------------
478
+ # ADD HERE — FILE HISTORY SIDEBAR
479
+ def display_file_history_in_sidebar():
480
+ """
481
+ Shows a history of each local .md, .mp3, .wav file in descending
482
+ order of modification time, with quick icons and optional download links.
483
+ """
484
+ st.sidebar.markdown("---")
485
+ st.sidebar.markdown("### 📂 File History")
486
+
487
+ # Gather all files of interest
488
+ md_files = glob.glob("*.md")
489
+ mp3_files = glob.glob("*.mp3")
490
+ wav_files = glob.glob("*.wav")
491
+ all_files = md_files + mp3_files + wav_files
492
+
493
+ if not all_files:
494
+ st.sidebar.write("No files found.")
495
+ return
496
+
497
+ # Sort by newest first
498
+ all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
499
+
500
+ for f in all_files:
501
+ fname = os.path.basename(f)
502
+ ext = os.path.splitext(fname)[1].lower().strip('.')
503
+ emoji = FILE_EMOJIS.get(ext, '📦')
504
+ time_str = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
505
+
506
+ with st.sidebar.expander(f"{emoji} {fname}"):
507
+ st.write(f"**Modified:** {time_str}")
508
+ # Optionally show a snippet for .md:
509
+ if ext == "md":
510
+ with open(f, "r", encoding="utf-8") as file_in:
511
+ snippet = file_in.read(200).replace("\n", " ")
512
+ if len(snippet) == 200:
513
+ snippet += "..."
514
+ st.write(snippet)
515
+ st.markdown(get_download_link(f, file_type="md"), unsafe_allow_html=True)
516
+ # If it's audio, let user play it
517
+ elif ext in ["mp3","wav"]:
518
+ st.audio(f)
519
+ st.markdown(get_download_link(f, file_type=ext), unsafe_allow_html=True)
520
+ else:
521
+ st.markdown(get_download_link(f), unsafe_allow_html=True)
522
+
523
+ # ----------------------------------------------------------------------------
524
+
525
  def main():
 
526
  update_marquee_settings_ui()
527
  marquee_settings = get_marquee_settings()
 
 
528
  display_marquee(st.session_state['marquee_content'],
529
  {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
530
  key_suffix="welcome")
531
 
532
+ # -- Insert your main app tabs, logic, etc. --
533
  tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
534
  horizontal=True)
535
 
 
536
  mycomponent = components.declare_component("mycomponent", path="mycomponent")
537
  val = mycomponent(my_input_value="Hello")
538
 
 
539
  if val:
540
  val_stripped = val.replace('\\n', ' ')
541
  edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
 
542
  run_option = st.selectbox("Model:", ["Arxiv"])
543
  col1, col2 = st.columns(2)
544
  with col1:
 
566
  titles_summary=True,
567
  full_audio=full_audio)
568
 
 
569
  if tab_main == "🔍 ArXiv":
570
  st.subheader("🔍 Query ArXiv")
571
  q = st.text_input("🔍 Query:", key="arxiv_query")
 
572
  st.markdown("### 🎛 Options")
573
  vocal_summary = st.checkbox("🎙ShortAudio", value=True, key="option_vocal_summary")
574
  extended_refs = st.checkbox("📜LongRefs", value=False, key="option_extended_refs")
 
583
  if full_transcript:
584
  create_file(q, result, "md")
585
 
 
586
  elif tab_main == "🎤 Voice":
587
  st.subheader("🎤 Voice Input")
 
 
588
  st.markdown("### 🎤 Voice Settings")
589
  selected_voice = st.selectbox(
590
  "Select TTS Voice:",
591
  options=EDGE_TTS_VOICES,
592
  index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
593
  )
 
594
  st.markdown("### 🔊 Audio Format")
595
  selected_format = st.radio(
596
  "Choose Audio Format:",
 
605
  st.session_state['audio_format'] = selected_format.lower()
606
  st.rerun()
607
 
 
608
  user_text = st.text_area("💬 Message:", height=100)
609
  user_text = user_text.strip().replace('\n', ' ')
610
 
 
616
  st.write("**You:**", c["user"])
617
  st.write("**Response:**", c["claude"])
618
 
 
619
  elif tab_main == "📸 Media":
620
  st.header("📸 Media Gallery")
621
+ tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"]) # audio first
 
622
  with tabs[0]:
623
  st.subheader("🎵 Audio Files")
624
  audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
 
631
  st.markdown(dl_link, unsafe_allow_html=True)
632
  else:
633
  st.write("No audio files found.")
 
 
634
  with tabs[1]:
635
  st.subheader("🖼 Image Files")
636
  imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
 
642
  st.image(Image.open(f), use_container_width=True)
643
  else:
644
  st.write("No images found.")
 
 
645
  with tabs[2]:
646
  st.subheader("🎥 Video Files")
647
  vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
 
652
  else:
653
  st.write("No videos found.")
654
 
 
655
  elif tab_main == "📝 Editor":
656
+ st.write("Select or create a file to edit. (Currently minimal)")
657
+
658
+ # --- IMPORTANT: Display the file-history in the sidebar
659
+ display_file_history_in_sidebar()
660
 
661
  st.markdown("""
662
  <style>
 
670
  st.session_state.should_rerun = False
671
  st.rerun()
672
 
 
673
  if __name__ == "__main__":
674
  main()