awacke1 commited on
Commit
dde3eb8
Β·
verified Β·
1 Parent(s): 7104798

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -26
app.py CHANGED
@@ -21,7 +21,9 @@ import asyncio
21
  import edge_tts
22
  from streamlit_marquee import streamlit_marquee
23
 
24
- # 🎯 1. Core Configuration & Setup
 
 
25
  st.set_page_config(
26
  page_title="🚲TalkingAIResearcherπŸ†",
27
  page_icon="πŸš²πŸ†",
@@ -35,7 +37,7 @@ st.set_page_config(
35
  )
36
  load_dotenv()
37
 
38
- # Add available English voices for Edge TTS
39
  EDGE_TTS_VOICES = [
40
  "en-US-AriaNeural",
41
  "en-US-GuyNeural",
@@ -48,7 +50,7 @@ EDGE_TTS_VOICES = [
48
  "en-CA-LiamNeural"
49
  ]
50
 
51
- # Initialize session state variables
52
  if 'marquee_settings' not in st.session_state:
53
  st.session_state['marquee_settings'] = {
54
  "background": "#1E1E1E",
@@ -61,35 +63,50 @@ if 'marquee_settings' not in st.session_state:
61
 
62
  if 'tts_voice' not in st.session_state:
63
  st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
 
64
  if 'audio_format' not in st.session_state:
65
  st.session_state['audio_format'] = 'mp3'
 
66
  if 'transcript_history' not in st.session_state:
67
  st.session_state['transcript_history'] = []
 
68
  if 'chat_history' not in st.session_state:
69
  st.session_state['chat_history'] = []
 
70
  if 'openai_model' not in st.session_state:
71
  st.session_state['openai_model'] = "gpt-4o-2024-05-13"
 
72
  if 'messages' not in st.session_state:
73
  st.session_state['messages'] = []
 
74
  if 'last_voice_input' not in st.session_state:
75
  st.session_state['last_voice_input'] = ""
 
76
  if 'editing_file' not in st.session_state:
77
  st.session_state['editing_file'] = None
 
78
  if 'edit_new_name' not in st.session_state:
79
  st.session_state['edit_new_name'] = ""
 
80
  if 'edit_new_content' not in st.session_state:
81
  st.session_state['edit_new_content'] = ""
 
82
  if 'viewing_prefix' not in st.session_state:
83
  st.session_state['viewing_prefix'] = None
 
84
  if 'should_rerun' not in st.session_state:
85
  st.session_state['should_rerun'] = False
 
86
  if 'old_val' not in st.session_state:
87
  st.session_state['old_val'] = None
 
88
  if 'last_query' not in st.session_state:
89
  st.session_state['last_query'] = ""
 
90
  if 'marquee_content' not in st.session_state:
91
  st.session_state['marquee_content'] = "πŸš€ Welcome to TalkingAIResearcher | πŸ€– Your Research Assistant"
92
 
 
93
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
94
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
95
  xai_key = os.getenv('xai',"")
@@ -103,17 +120,24 @@ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_OR
103
  HF_KEY = os.getenv('HF_KEY')
104
  API_URL = os.getenv('API_URL')
105
 
 
106
  FILE_EMOJIS = {
107
  "md": "πŸ“",
108
  "mp3": "🎡",
109
  "wav": "πŸ”Š"
110
  }
111
 
 
 
 
 
112
  def get_central_time():
 
113
  central = pytz.timezone('US/Central')
114
  return datetime.now(central)
115
 
116
  def format_timestamp_prefix():
 
117
  ct = get_central_time()
118
  return ct.strftime("%m_%d_%y_%I_%M_%p")
119
 
@@ -133,6 +157,7 @@ def get_marquee_settings():
133
  return st.session_state['marquee_settings']
134
 
135
  def update_marquee_settings_ui():
 
136
  st.sidebar.markdown("### 🎯 Marquee Settings")
137
  cols = st.sidebar.columns(2)
138
  with cols[0]:
@@ -154,6 +179,7 @@ def update_marquee_settings_ui():
154
  })
155
 
156
  def display_marquee(text, settings, key_suffix=""):
 
157
  truncated_text = text[:280] + "..." if len(text) > 280 else text
158
  streamlit_marquee(
159
  content=truncated_text,
@@ -163,6 +189,7 @@ def display_marquee(text, settings, key_suffix=""):
163
  st.write("")
164
 
165
  def get_high_info_terms(text: str, top_n=10) -> list:
 
166
  stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
167
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
168
  bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
@@ -172,6 +199,7 @@ def get_high_info_terms(text: str, top_n=10) -> list:
172
  return [term for term, freq in counter.most_common(top_n)]
173
 
174
  def clean_text_for_filename(text: str) -> str:
 
175
  text = text.lower()
176
  text = re.sub(r'[^\w\s-]', '', text)
177
  words = text.split()
@@ -181,11 +209,19 @@ def clean_text_for_filename(text: str) -> str:
181
  return '_'.join(filtered)[:200]
182
 
183
  def generate_filename(prompt, response, file_type="md", max_length=200):
 
 
 
 
 
 
 
184
  prefix = format_timestamp_prefix() + "_"
185
  combined_text = (prompt + " " + response)[:200]
186
  info_terms = get_high_info_terms(combined_text, top_n=5)
187
  snippet = (prompt[:40] + " " + response[:40]).strip()
188
  snippet_cleaned = clean_text_for_filename(snippet)
 
189
  # remove duplicates
190
  name_parts = info_terms + [snippet_cleaned]
191
  seen = set()
@@ -194,8 +230,8 @@ def generate_filename(prompt, response, file_type="md", max_length=200):
194
  if part not in seen:
195
  seen.add(part)
196
  unique_parts.append(part)
197
- full_name = '_'.join(unique_parts).strip('_')
198
 
 
199
  leftover_chars = max_length - len(prefix) - len(file_type) - 1
200
  if len(full_name) > leftover_chars:
201
  full_name = full_name[:leftover_chars]
@@ -203,12 +239,16 @@ def generate_filename(prompt, response, file_type="md", max_length=200):
203
  return f"{prefix}{full_name}.{file_type}"
204
 
205
  def create_file(prompt, response, file_type="md"):
 
206
  filename = generate_filename(prompt.strip(), response.strip(), file_type)
207
  with open(filename, 'w', encoding='utf-8') as f:
208
  f.write(prompt + "\n\n" + response)
209
  return filename
210
 
211
  def get_download_link(file, file_type="zip"):
 
 
 
212
  with open(file, "rb") as f:
213
  b64 = base64.b64encode(f.read()).decode()
214
  if file_type == "zip":
@@ -223,6 +263,7 @@ def get_download_link(file, file_type="zip"):
223
  return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
224
 
225
  def clean_for_speech(text: str) -> str:
 
226
  text = text.replace("\n", " ")
227
  text = text.replace("</s>", " ")
228
  text = text.replace("#", "")
@@ -231,6 +272,7 @@ def clean_for_speech(text: str) -> str:
231
  return text
232
 
233
  async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
 
234
  text = clean_for_speech(text)
235
  if not text.strip():
236
  return None
@@ -242,17 +284,21 @@ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=
242
  return out_fn
243
 
244
  def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
 
245
  return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
246
 
247
  def play_and_download_audio(file_path, file_type="mp3"):
 
248
  if file_path and os.path.exists(file_path):
249
  st.audio(file_path)
250
  dl_link = get_download_link(file_path, file_type=file_type)
251
  st.markdown(dl_link, unsafe_allow_html=True)
252
 
253
  def save_qa_with_audio(question, answer, voice=None):
 
254
  if not voice:
255
  voice = st.session_state['tts_voice']
 
256
  combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
257
  md_file = create_file(question, answer, "md")
258
  audio_text = f"{question}\n\nAnswer: {answer}"
@@ -263,7 +309,15 @@ def save_qa_with_audio(question, answer, voice=None):
263
  )
264
  return md_file, audio_file
265
 
 
 
 
 
266
  def parse_arxiv_refs(ref_text: str):
 
 
 
 
267
  if not ref_text:
268
  return []
269
 
@@ -273,6 +327,7 @@ def parse_arxiv_refs(ref_text: str):
273
 
274
  for i, line in enumerate(lines):
275
  if line.count('|') == 2:
 
276
  if current_paper:
277
  results.append(current_paper)
278
  if len(results) >= 20:
@@ -283,6 +338,7 @@ def parse_arxiv_refs(ref_text: str):
283
  title = header_parts[1].strip()
284
  url_match = re.search(r'(https://arxiv.org/\S+)', line)
285
  url = url_match.group(1) if url_match else f"paper_{len(results)}"
 
286
  current_paper = {
287
  'date': date,
288
  'title': title,
@@ -296,7 +352,9 @@ def parse_arxiv_refs(ref_text: str):
296
  st.warning(f"Error parsing paper header: {str(e)}")
297
  current_paper = {}
298
  continue
 
299
  elif current_paper:
 
300
  if not current_paper['authors']:
301
  current_paper['authors'] = line.strip('* ')
302
  else:
@@ -304,23 +362,28 @@ def parse_arxiv_refs(ref_text: str):
304
  current_paper['summary'] += ' ' + line.strip()
305
  else:
306
  current_paper['summary'] = line.strip()
 
307
  if current_paper:
308
  results.append(current_paper)
309
 
310
  return results[:20]
311
 
312
  def create_paper_links_md(papers):
 
313
  lines = ["# Paper Links\n"]
314
  for i, p in enumerate(papers, start=1):
315
  lines.append(f"{i}. **{p['title']}** β€” [Arxiv]({p['url']})")
316
  return "\n".join(lines)
317
 
318
  def create_paper_audio_files(papers, input_question):
 
 
 
 
319
  for paper in papers:
320
  try:
321
  audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
322
  audio_text = clean_for_speech(audio_text)
323
-
324
  file_format = st.session_state['audio_format']
325
  audio_file = speak_with_edge_tts(
326
  audio_text,
@@ -345,13 +408,14 @@ def create_paper_audio_files(papers, input_question):
345
  paper['download_base64'] = ''
346
 
347
  def display_papers(papers, marquee_settings):
 
348
  st.write("## Research Papers")
349
  for i, paper in enumerate(papers, start=1):
350
  marquee_text = f"πŸ“„ {paper['title']} | πŸ‘€ {paper['authors'][:120]} | πŸ“ {paper['summary'][:200]}"
351
  display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
352
 
353
  with st.expander(f"{i}. πŸ“„ {paper['title']}", expanded=True):
354
- st.markdown(f"**{paper['date']} | {paper['title']} |** [Arxiv Link]({paper['url']})")
355
  st.markdown(f"*Authors:* {paper['authors']}")
356
  st.markdown(paper['summary'])
357
  if paper.get('full_audio'):
@@ -361,6 +425,7 @@ def display_papers(papers, marquee_settings):
361
  st.markdown(paper['download_base64'], unsafe_allow_html=True)
362
 
363
  def display_papers_in_sidebar(papers):
 
364
  st.sidebar.title("🎢 Papers & Audio")
365
  for i, paper in enumerate(papers, start=1):
366
  with st.sidebar.expander(f"{i}. {paper['title']}"):
@@ -373,7 +438,15 @@ def display_papers_in_sidebar(papers):
373
  if paper['summary']:
374
  st.markdown(f"**Summary:** {paper['summary'][:300]}...")
375
 
 
 
 
 
376
  def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
 
 
 
 
377
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
378
  all_files = md_files + mp3_files + wav_files
379
  if not all_files:
@@ -402,13 +475,19 @@ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
402
  z.write(f)
403
  return short_zip_name
404
 
 
 
 
 
405
  def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
406
  titles_summary=True, full_audio=False):
 
407
  start = time.time()
408
  ai_constitution = """
409
  You are a talented AI coder and songwriter...
410
  """
411
- # Claude:
 
412
  client = anthropic.Anthropic(api_key=anthropic_key)
413
  user_input = q
414
  response = client.messages.create(
@@ -420,33 +499,45 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
420
  st.write("Claude's reply 🧠:")
421
  st.markdown(response.content[0].text)
422
 
 
423
  result = response.content[0].text
424
  create_file(q, result)
425
  md_file, audio_file = save_qa_with_audio(q, result)
426
  st.subheader("πŸ“ Main Response Audio")
427
  play_and_download_audio(audio_file, st.session_state['audio_format'])
428
 
429
- # Arxiv:
430
- st.write("Arxiv's AI this Evening...")
431
- from gradio_client import Client
432
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
433
- refs = client.predict(q, 20, "Semantic Search",
434
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
435
- api_name="/update_with_rag_md")[0]
436
- r2 = client.predict(q, "mistralai/Mixtral-8x7B-Instruct-v0.1",
437
- True, api_name="/ask_llm")
 
 
 
 
 
 
 
 
 
438
 
439
  result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
440
  md_file, audio_file = save_qa_with_audio(q, result)
441
  st.subheader("πŸ“ Main Response Audio")
442
  play_and_download_audio(audio_file, st.session_state['audio_format'])
443
 
 
444
  papers = parse_arxiv_refs(refs)
445
  if papers:
 
446
  paper_links = create_paper_links_md(papers)
447
  links_file = create_file(q, paper_links, "md")
448
  st.markdown(paper_links)
449
 
 
450
  create_paper_audio_files(papers, input_question=q)
451
  display_papers(papers, get_marquee_settings())
452
  display_papers_in_sidebar(papers)
@@ -458,6 +549,7 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
458
  return result
459
 
460
  def process_voice_input(text):
 
461
  if not text:
462
  return
463
  st.subheader("πŸ” Search Results")
@@ -474,8 +566,10 @@ def process_voice_input(text):
474
  st.write(f"Audio: {audio_file}")
475
  play_and_download_audio(audio_file, st.session_state['audio_format'])
476
 
477
- # ----------------------------------------------------------------------------
478
- # ADD HERE β€” FILE HISTORY SIDEBAR
 
 
479
  def display_file_history_in_sidebar():
480
  """
481
  Shows a history of each local .md, .mp3, .wav file in descending
@@ -484,7 +578,7 @@ def display_file_history_in_sidebar():
484
  st.sidebar.markdown("---")
485
  st.sidebar.markdown("### πŸ“‚ File History")
486
 
487
- # Gather all files of interest
488
  md_files = glob.glob("*.md")
489
  mp3_files = glob.glob("*.mp3")
490
  wav_files = glob.glob("*.wav")
@@ -494,7 +588,7 @@ def display_file_history_in_sidebar():
494
  st.sidebar.write("No files found.")
495
  return
496
 
497
- # Sort by newest first
498
  all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
499
 
500
  for f in all_files:
@@ -505,7 +599,6 @@ def display_file_history_in_sidebar():
505
 
506
  with st.sidebar.expander(f"{emoji} {fname}"):
507
  st.write(f"**Modified:** {time_str}")
508
- # Optionally show a snippet for .md:
509
  if ext == "md":
510
  with open(f, "r", encoding="utf-8") as file_in:
511
  snippet = file_in.read(200).replace("\n", " ")
@@ -513,26 +606,31 @@ def display_file_history_in_sidebar():
513
  snippet += "..."
514
  st.write(snippet)
515
  st.markdown(get_download_link(f, file_type="md"), unsafe_allow_html=True)
516
- # If it's audio, let user play it
517
  elif ext in ["mp3","wav"]:
518
  st.audio(f)
519
  st.markdown(get_download_link(f, file_type=ext), unsafe_allow_html=True)
520
  else:
521
  st.markdown(get_download_link(f), unsafe_allow_html=True)
522
 
523
- # ----------------------------------------------------------------------------
 
 
524
 
525
  def main():
 
526
  update_marquee_settings_ui()
527
  marquee_settings = get_marquee_settings()
 
 
528
  display_marquee(st.session_state['marquee_content'],
529
  {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
530
  key_suffix="welcome")
531
 
532
- # -- Insert your main app tabs, logic, etc. --
533
  tab_main = st.radio("Action:", ["🎀 Voice", "πŸ“Έ Media", "πŸ” ArXiv", "πŸ“ Editor"],
534
  horizontal=True)
535
 
 
536
  mycomponent = components.declare_component("mycomponent", path="mycomponent")
537
  val = mycomponent(my_input_value="Hello")
538
 
@@ -566,9 +664,13 @@ def main():
566
  titles_summary=True,
567
  full_audio=full_audio)
568
 
 
 
 
569
  if tab_main == "πŸ” ArXiv":
570
  st.subheader("πŸ” Query ArXiv")
571
  q = st.text_input("πŸ” Query:", key="arxiv_query")
 
572
  st.markdown("### πŸŽ› Options")
573
  vocal_summary = st.checkbox("πŸŽ™ShortAudio", value=True, key="option_vocal_summary")
574
  extended_refs = st.checkbox("πŸ“œLongRefs", value=False, key="option_extended_refs")
@@ -583,14 +685,19 @@ def main():
583
  if full_transcript:
584
  create_file(q, result, "md")
585
 
 
 
 
586
  elif tab_main == "🎀 Voice":
587
  st.subheader("🎀 Voice Input")
 
588
  st.markdown("### 🎀 Voice Settings")
589
  selected_voice = st.selectbox(
590
  "Select TTS Voice:",
591
  options=EDGE_TTS_VOICES,
592
  index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
593
  )
 
594
  st.markdown("### πŸ”Š Audio Format")
595
  selected_format = st.radio(
596
  "Choose Audio Format:",
@@ -598,6 +705,7 @@ def main():
598
  index=0
599
  )
600
 
 
601
  if selected_voice != st.session_state['tts_voice']:
602
  st.session_state['tts_voice'] = selected_voice
603
  st.rerun()
@@ -605,6 +713,7 @@ def main():
605
  st.session_state['audio_format'] = selected_format.lower()
606
  st.rerun()
607
 
 
608
  user_text = st.text_area("πŸ’¬ Message:", height=100)
609
  user_text = user_text.strip().replace('\n', ' ')
610
 
@@ -616,9 +725,16 @@ def main():
616
  st.write("**You:**", c["user"])
617
  st.write("**Response:**", c["claude"])
618
 
 
 
 
619
  elif tab_main == "πŸ“Έ Media":
620
  st.header("πŸ“Έ Media Gallery")
621
- tabs = st.tabs(["🎡 Audio", "πŸ–Ό Images", "πŸŽ₯ Video"]) # audio first
 
 
 
 
622
  with tabs[0]:
623
  st.subheader("🎡 Audio Files")
624
  audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
@@ -631,6 +747,8 @@ def main():
631
  st.markdown(dl_link, unsafe_allow_html=True)
632
  else:
633
  st.write("No audio files found.")
 
 
634
  with tabs[1]:
635
  st.subheader("πŸ–Ό Image Files")
636
  imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
@@ -642,6 +760,8 @@ def main():
642
  st.image(Image.open(f), use_container_width=True)
643
  else:
644
  st.write("No images found.")
 
 
645
  with tabs[2]:
646
  st.subheader("πŸŽ₯ Video Files")
647
  vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
@@ -652,12 +772,18 @@ def main():
652
  else:
653
  st.write("No videos found.")
654
 
 
 
 
655
  elif tab_main == "πŸ“ Editor":
656
- st.write("Select or create a file to edit. (Currently minimal)")
657
 
658
- # --- IMPORTANT: Display the file-history in the sidebar
 
 
659
  display_file_history_in_sidebar()
660
 
 
661
  st.markdown("""
662
  <style>
663
  .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
@@ -666,6 +792,7 @@ def main():
666
  </style>
667
  """, unsafe_allow_html=True)
668
 
 
669
  if st.session_state.should_rerun:
670
  st.session_state.should_rerun = False
671
  st.rerun()
 
21
  import edge_tts
22
  from streamlit_marquee import streamlit_marquee
23
 
24
+ # ─────────────────────────────────────────────────────────
25
+ # 1. CORE CONFIGURATION & SETUP
26
+ # ─────────────────────────────────────────────────────────
27
  st.set_page_config(
28
  page_title="🚲TalkingAIResearcherπŸ†",
29
  page_icon="πŸš²πŸ†",
 
37
  )
38
  load_dotenv()
39
 
40
+ # Available English voices for Edge TTS
41
  EDGE_TTS_VOICES = [
42
  "en-US-AriaNeural",
43
  "en-US-GuyNeural",
 
50
  "en-CA-LiamNeural"
51
  ]
52
 
53
+ # Session state variables
54
  if 'marquee_settings' not in st.session_state:
55
  st.session_state['marquee_settings'] = {
56
  "background": "#1E1E1E",
 
63
 
64
  if 'tts_voice' not in st.session_state:
65
  st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
66
+
67
  if 'audio_format' not in st.session_state:
68
  st.session_state['audio_format'] = 'mp3'
69
+
70
  if 'transcript_history' not in st.session_state:
71
  st.session_state['transcript_history'] = []
72
+
73
  if 'chat_history' not in st.session_state:
74
  st.session_state['chat_history'] = []
75
+
76
  if 'openai_model' not in st.session_state:
77
  st.session_state['openai_model'] = "gpt-4o-2024-05-13"
78
+
79
  if 'messages' not in st.session_state:
80
  st.session_state['messages'] = []
81
+
82
  if 'last_voice_input' not in st.session_state:
83
  st.session_state['last_voice_input'] = ""
84
+
85
  if 'editing_file' not in st.session_state:
86
  st.session_state['editing_file'] = None
87
+
88
  if 'edit_new_name' not in st.session_state:
89
  st.session_state['edit_new_name'] = ""
90
+
91
  if 'edit_new_content' not in st.session_state:
92
  st.session_state['edit_new_content'] = ""
93
+
94
  if 'viewing_prefix' not in st.session_state:
95
  st.session_state['viewing_prefix'] = None
96
+
97
  if 'should_rerun' not in st.session_state:
98
  st.session_state['should_rerun'] = False
99
+
100
  if 'old_val' not in st.session_state:
101
  st.session_state['old_val'] = None
102
+
103
  if 'last_query' not in st.session_state:
104
  st.session_state['last_query'] = ""
105
+
106
  if 'marquee_content' not in st.session_state:
107
  st.session_state['marquee_content'] = "πŸš€ Welcome to TalkingAIResearcher | πŸ€– Your Research Assistant"
108
 
109
+ # API Keys
110
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
111
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
112
  xai_key = os.getenv('xai',"")
 
120
  HF_KEY = os.getenv('HF_KEY')
121
  API_URL = os.getenv('API_URL')
122
 
123
+ # Helper constants
124
  FILE_EMOJIS = {
125
  "md": "πŸ“",
126
  "mp3": "🎡",
127
  "wav": "πŸ”Š"
128
  }
129
 
130
+ # ─────────────────────────────────────────────────────────
131
+ # 2. HELPER FUNCTIONS
132
+ # ─────────────────────────────────────────────────────────
133
+
134
  def get_central_time():
135
+ """Get current time in US Central timezone."""
136
  central = pytz.timezone('US/Central')
137
  return datetime.now(central)
138
 
139
  def format_timestamp_prefix():
140
+ """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM."""
141
  ct = get_central_time()
142
  return ct.strftime("%m_%d_%y_%I_%M_%p")
143
 
 
157
  return st.session_state['marquee_settings']
158
 
159
  def update_marquee_settings_ui():
160
+ """Add color pickers & sliders for marquee config in sidebar."""
161
  st.sidebar.markdown("### 🎯 Marquee Settings")
162
  cols = st.sidebar.columns(2)
163
  with cols[0]:
 
179
  })
180
 
181
  def display_marquee(text, settings, key_suffix=""):
182
+ """Show marquee text with style from settings."""
183
  truncated_text = text[:280] + "..." if len(text) > 280 else text
184
  streamlit_marquee(
185
  content=truncated_text,
 
189
  st.write("")
190
 
191
  def get_high_info_terms(text: str, top_n=10) -> list:
192
+ """Extract top_n freq words or bigrams (excluding stopwords)."""
193
  stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
194
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
195
  bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
 
199
  return [term for term, freq in counter.most_common(top_n)]
200
 
201
  def clean_text_for_filename(text: str) -> str:
202
+ """Remove special chars, short words, etc. for filenames."""
203
  text = text.lower()
204
  text = re.sub(r'[^\w\s-]', '', text)
205
  words = text.split()
 
209
  return '_'.join(filtered)[:200]
210
 
211
  def generate_filename(prompt, response, file_type="md", max_length=200):
212
+ """
213
+ Generate a shortened filename by:
214
+ 1) extracting high-info terms,
215
+ 2) snippet from prompt+response,
216
+ 3) remove duplicates,
217
+ 4) truncate if needed.
218
+ """
219
  prefix = format_timestamp_prefix() + "_"
220
  combined_text = (prompt + " " + response)[:200]
221
  info_terms = get_high_info_terms(combined_text, top_n=5)
222
  snippet = (prompt[:40] + " " + response[:40]).strip()
223
  snippet_cleaned = clean_text_for_filename(snippet)
224
+
225
  # remove duplicates
226
  name_parts = info_terms + [snippet_cleaned]
227
  seen = set()
 
230
  if part not in seen:
231
  seen.add(part)
232
  unique_parts.append(part)
 
233
 
234
+ full_name = '_'.join(unique_parts).strip('_')
235
  leftover_chars = max_length - len(prefix) - len(file_type) - 1
236
  if len(full_name) > leftover_chars:
237
  full_name = full_name[:leftover_chars]
 
239
  return f"{prefix}{full_name}.{file_type}"
240
 
241
  def create_file(prompt, response, file_type="md"):
242
+ """Create a text file from prompt + response with sanitized filename."""
243
  filename = generate_filename(prompt.strip(), response.strip(), file_type)
244
  with open(filename, 'w', encoding='utf-8') as f:
245
  f.write(prompt + "\n\n" + response)
246
  return filename
247
 
248
  def get_download_link(file, file_type="zip"):
249
+ """
250
+ Convert a file to base64 and return an HTML link for download.
251
+ """
252
  with open(file, "rb") as f:
253
  b64 = base64.b64encode(f.read()).decode()
254
  if file_type == "zip":
 
263
  return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
264
 
265
  def clean_for_speech(text: str) -> str:
266
+ """Clean up text for TTS output."""
267
  text = text.replace("\n", " ")
268
  text = text.replace("</s>", " ")
269
  text = text.replace("#", "")
 
272
  return text
273
 
274
  async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
275
+ """Async TTS generation with edge-tts library."""
276
  text = clean_for_speech(text)
277
  if not text.strip():
278
  return None
 
284
  return out_fn
285
 
286
  def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
287
+ """Wrapper for the async TTS generate call."""
288
  return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
289
 
290
  def play_and_download_audio(file_path, file_type="mp3"):
291
+ """Streamlit audio + a quick download link."""
292
  if file_path and os.path.exists(file_path):
293
  st.audio(file_path)
294
  dl_link = get_download_link(file_path, file_type=file_type)
295
  st.markdown(dl_link, unsafe_allow_html=True)
296
 
297
  def save_qa_with_audio(question, answer, voice=None):
298
+ """Save Q&A to markdown and also generate audio."""
299
  if not voice:
300
  voice = st.session_state['tts_voice']
301
+
302
  combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
303
  md_file = create_file(question, answer, "md")
304
  audio_text = f"{question}\n\nAnswer: {answer}"
 
309
  )
310
  return md_file, audio_file
311
 
312
+ # ─────────────────────────────────────────────────────────
313
+ # 3. PAPER PARSING & DISPLAY
314
+ # ─────────────────────────────────────────────────────────
315
+
316
  def parse_arxiv_refs(ref_text: str):
317
+ """
318
+ Given a multi-line markdown with arxiv references, parse them into
319
+ a list of dicts: {date, title, url, authors, summary, ...}.
320
+ """
321
  if not ref_text:
322
  return []
323
 
 
327
 
328
  for i, line in enumerate(lines):
329
  if line.count('|') == 2:
330
+ # Found a new paper line
331
  if current_paper:
332
  results.append(current_paper)
333
  if len(results) >= 20:
 
338
  title = header_parts[1].strip()
339
  url_match = re.search(r'(https://arxiv.org/\S+)', line)
340
  url = url_match.group(1) if url_match else f"paper_{len(results)}"
341
+
342
  current_paper = {
343
  'date': date,
344
  'title': title,
 
352
  st.warning(f"Error parsing paper header: {str(e)}")
353
  current_paper = {}
354
  continue
355
+
356
  elif current_paper:
357
+ # If authors not set, fill it; otherwise, fill summary
358
  if not current_paper['authors']:
359
  current_paper['authors'] = line.strip('* ')
360
  else:
 
362
  current_paper['summary'] += ' ' + line.strip()
363
  else:
364
  current_paper['summary'] = line.strip()
365
+
366
  if current_paper:
367
  results.append(current_paper)
368
 
369
  return results[:20]
370
 
371
  def create_paper_links_md(papers):
372
+ """Creates a minimal .md content linking to each paper's arxiv URL."""
373
  lines = ["# Paper Links\n"]
374
  for i, p in enumerate(papers, start=1):
375
  lines.append(f"{i}. **{p['title']}** β€” [Arxiv]({p['url']})")
376
  return "\n".join(lines)
377
 
378
  def create_paper_audio_files(papers, input_question):
379
+ """
380
+ For each paper, generate TTS audio summary, store the path in `paper['full_audio']`,
381
+ and also store a base64 link for stable downloading.
382
+ """
383
  for paper in papers:
384
  try:
385
  audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
386
  audio_text = clean_for_speech(audio_text)
 
387
  file_format = st.session_state['audio_format']
388
  audio_file = speak_with_edge_tts(
389
  audio_text,
 
408
  paper['download_base64'] = ''
409
 
410
  def display_papers(papers, marquee_settings):
411
+ """Display paper info in the main area with marquee + expanders + audio."""
412
  st.write("## Research Papers")
413
  for i, paper in enumerate(papers, start=1):
414
  marquee_text = f"πŸ“„ {paper['title']} | πŸ‘€ {paper['authors'][:120]} | πŸ“ {paper['summary'][:200]}"
415
  display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
416
 
417
  with st.expander(f"{i}. πŸ“„ {paper['title']}", expanded=True):
418
+ st.markdown(f"**{paper['date']} | {paper['title']}** β€” [Arxiv Link]({paper['url']})")
419
  st.markdown(f"*Authors:* {paper['authors']}")
420
  st.markdown(paper['summary'])
421
  if paper.get('full_audio'):
 
425
  st.markdown(paper['download_base64'], unsafe_allow_html=True)
426
 
427
  def display_papers_in_sidebar(papers):
428
+ """Mirrors the paper listing in the sidebar with expanders, audio, etc."""
429
  st.sidebar.title("🎢 Papers & Audio")
430
  for i, paper in enumerate(papers, start=1):
431
  with st.sidebar.expander(f"{i}. {paper['title']}"):
 
438
  if paper['summary']:
439
  st.markdown(f"**Summary:** {paper['summary'][:300]}...")
440
 
441
+ # ─────────────────────────────────────────────────────────
442
+ # 4. ZIP FUNCTION
443
+ # ─────────────────────────────────────────────────────────
444
+
445
  def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
446
+ """
447
+ Zip up all relevant files, limiting the final zip name to ~20 chars
448
+ to avoid overly long base64 strings.
449
+ """
450
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
451
  all_files = md_files + mp3_files + wav_files
452
  if not all_files:
 
475
  z.write(f)
476
  return short_zip_name
477
 
478
+ # ─────────────────────────────────────────────────────────
479
+ # 5. MAIN LOGIC: AI LOOKUP & VOICE INPUT
480
+ # ─────────────────────────────────────────────────────────
481
+
482
  def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
483
  titles_summary=True, full_audio=False):
484
+ """Main routine that uses Anthropic (Claude) + Gradio ArXiv RAG pipeline."""
485
  start = time.time()
486
  ai_constitution = """
487
  You are a talented AI coder and songwriter...
488
  """
489
+
490
+ # --- 1) Claude API
491
  client = anthropic.Anthropic(api_key=anthropic_key)
492
  user_input = q
493
  response = client.messages.create(
 
499
  st.write("Claude's reply 🧠:")
500
  st.markdown(response.content[0].text)
501
 
502
+ # Save & produce audio
503
  result = response.content[0].text
504
  create_file(q, result)
505
  md_file, audio_file = save_qa_with_audio(q, result)
506
  st.subheader("πŸ“ Main Response Audio")
507
  play_and_download_audio(audio_file, st.session_state['audio_format'])
508
 
509
+ # --- 2) Arxiv RAG
510
+ st.write("Arxiv's AI this Evening is Mixtral 8x7B...")
 
511
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
512
+ refs = client.predict(
513
+ q,
514
+ 20,
515
+ "Semantic Search",
516
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
517
+ api_name="/update_with_rag_md"
518
+ )[0]
519
+
520
+ r2 = client.predict(
521
+ q,
522
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
523
+ True,
524
+ api_name="/ask_llm"
525
+ )
526
 
527
  result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
528
  md_file, audio_file = save_qa_with_audio(q, result)
529
  st.subheader("πŸ“ Main Response Audio")
530
  play_and_download_audio(audio_file, st.session_state['audio_format'])
531
 
532
+ # --- 3) Parse + handle papers
533
  papers = parse_arxiv_refs(refs)
534
  if papers:
535
+ # Create minimal links page first
536
  paper_links = create_paper_links_md(papers)
537
  links_file = create_file(q, paper_links, "md")
538
  st.markdown(paper_links)
539
 
540
+ # Then create audio for each paper
541
  create_paper_audio_files(papers, input_question=q)
542
  display_papers(papers, get_marquee_settings())
543
  display_papers_in_sidebar(papers)
 
549
  return result
550
 
551
  def process_voice_input(text):
552
+ """When user sends voice query, we run the AI lookup + Q&A with audio."""
553
  if not text:
554
  return
555
  st.subheader("πŸ” Search Results")
 
566
  st.write(f"Audio: {audio_file}")
567
  play_and_download_audio(audio_file, st.session_state['audio_format'])
568
 
569
+ # ─────────────────────────────────────────────────────────
570
+ # 6. FILE HISTORY SIDEBAR
571
+ # ─────────────────────────────────────────────────────────
572
+
573
  def display_file_history_in_sidebar():
574
  """
575
  Shows a history of each local .md, .mp3, .wav file in descending
 
578
  st.sidebar.markdown("---")
579
  st.sidebar.markdown("### πŸ“‚ File History")
580
 
581
+ # Gather all files
582
  md_files = glob.glob("*.md")
583
  mp3_files = glob.glob("*.mp3")
584
  wav_files = glob.glob("*.wav")
 
588
  st.sidebar.write("No files found.")
589
  return
590
 
591
+ # Sort newest first
592
  all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
593
 
594
  for f in all_files:
 
599
 
600
  with st.sidebar.expander(f"{emoji} {fname}"):
601
  st.write(f"**Modified:** {time_str}")
 
602
  if ext == "md":
603
  with open(f, "r", encoding="utf-8") as file_in:
604
  snippet = file_in.read(200).replace("\n", " ")
 
606
  snippet += "..."
607
  st.write(snippet)
608
  st.markdown(get_download_link(f, file_type="md"), unsafe_allow_html=True)
 
609
  elif ext in ["mp3","wav"]:
610
  st.audio(f)
611
  st.markdown(get_download_link(f, file_type=ext), unsafe_allow_html=True)
612
  else:
613
  st.markdown(get_download_link(f), unsafe_allow_html=True)
614
 
615
+ # ─────────────────────────────────────────────────────────
616
+ # 7. MAIN APP
617
+ # ─────────────────────────────────────────────────────────
618
 
619
  def main():
620
+ # 1) Setup marquee UI in the sidebar
621
  update_marquee_settings_ui()
622
  marquee_settings = get_marquee_settings()
623
+
624
+ # 2) Display the marquee welcome
625
  display_marquee(st.session_state['marquee_content'],
626
  {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
627
  key_suffix="welcome")
628
 
629
+ # 3) Main action tabs
630
  tab_main = st.radio("Action:", ["🎀 Voice", "πŸ“Έ Media", "πŸ” ArXiv", "πŸ“ Editor"],
631
  horizontal=True)
632
 
633
+ # Example custom component usage
634
  mycomponent = components.declare_component("mycomponent", path="mycomponent")
635
  val = mycomponent(my_input_value="Hello")
636
 
 
664
  titles_summary=True,
665
  full_audio=full_audio)
666
 
667
+ # ─────────────────────────────────────────────────────────
668
+ # TAB: ArXiv
669
+ # ─────────────────────────────────────────────────────────
670
  if tab_main == "πŸ” ArXiv":
671
  st.subheader("πŸ” Query ArXiv")
672
  q = st.text_input("πŸ” Query:", key="arxiv_query")
673
+
674
  st.markdown("### πŸŽ› Options")
675
  vocal_summary = st.checkbox("πŸŽ™ShortAudio", value=True, key="option_vocal_summary")
676
  extended_refs = st.checkbox("πŸ“œLongRefs", value=False, key="option_extended_refs")
 
685
  if full_transcript:
686
  create_file(q, result, "md")
687
 
688
+ # ─────────────────────────────────────────────────────────
689
+ # TAB: Voice
690
+ # ─────────────────────────────────────────────────────────
691
  elif tab_main == "🎀 Voice":
692
  st.subheader("🎀 Voice Input")
693
+
694
  st.markdown("### 🎀 Voice Settings")
695
  selected_voice = st.selectbox(
696
  "Select TTS Voice:",
697
  options=EDGE_TTS_VOICES,
698
  index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
699
  )
700
+
701
  st.markdown("### πŸ”Š Audio Format")
702
  selected_format = st.radio(
703
  "Choose Audio Format:",
 
705
  index=0
706
  )
707
 
708
+ # Update session state if voice/format changes
709
  if selected_voice != st.session_state['tts_voice']:
710
  st.session_state['tts_voice'] = selected_voice
711
  st.rerun()
 
713
  st.session_state['audio_format'] = selected_format.lower()
714
  st.rerun()
715
 
716
+ # Input text
717
  user_text = st.text_area("πŸ’¬ Message:", height=100)
718
  user_text = user_text.strip().replace('\n', ' ')
719
 
 
725
  st.write("**You:**", c["user"])
726
  st.write("**Response:**", c["claude"])
727
 
728
+ # ─────────────────────────────────────────────────────────
729
+ # TAB: Media
730
+ # ─────────────────────────────────────────────────────────
731
  elif tab_main == "πŸ“Έ Media":
732
  st.header("πŸ“Έ Media Gallery")
733
+
734
+ # By default, show audio first
735
+ tabs = st.tabs(["🎡 Audio", "πŸ–Ό Images", "πŸŽ₯ Video"])
736
+
737
+ # AUDIO sub-tab
738
  with tabs[0]:
739
  st.subheader("🎡 Audio Files")
740
  audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
 
747
  st.markdown(dl_link, unsafe_allow_html=True)
748
  else:
749
  st.write("No audio files found.")
750
+
751
+ # IMAGES sub-tab
752
  with tabs[1]:
753
  st.subheader("πŸ–Ό Image Files")
754
  imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
 
760
  st.image(Image.open(f), use_container_width=True)
761
  else:
762
  st.write("No images found.")
763
+
764
+ # VIDEO sub-tab
765
  with tabs[2]:
766
  st.subheader("πŸŽ₯ Video Files")
767
  vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
 
772
  else:
773
  st.write("No videos found.")
774
 
775
+ # ─────────────────────────────────────────────────────────
776
+ # TAB: Editor
777
+ # ─────────────────────────────────────────────────────────
778
  elif tab_main == "πŸ“ Editor":
779
+ st.write("Select or create a file to edit. (Currently minimal demo)")
780
 
781
+ # ─────────────────────────────────────────────────────────
782
+ # SIDEBAR: FILE HISTORY
783
+ # ─────────────────────────────────────────────────────────
784
  display_file_history_in_sidebar()
785
 
786
+ # Some light CSS styling
787
  st.markdown("""
788
  <style>
789
  .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
 
792
  </style>
793
  """, unsafe_allow_html=True)
794
 
795
+ # Rerun if needed
796
  if st.session_state.should_rerun:
797
  st.session_state.should_rerun = False
798
  st.rerun()