awacke1 commited on
Commit
6f62552
·
verified ·
1 Parent(s): 3d49bb6

Update backup13.app.py

Browse files
Files changed (1) hide show
  1. backup13.app.py +233 -223
backup13.app.py CHANGED
@@ -37,8 +37,8 @@ load_dotenv()
37
 
38
  # Add available English voices for Edge TTS
39
  EDGE_TTS_VOICES = [
40
- "en-US-AriaNeural", # Default voice
41
- "en-US-GuyNeural",
42
  "en-US-JennyNeural",
43
  "en-GB-SoniaNeural",
44
  "en-GB-RyanNeural",
@@ -49,6 +49,16 @@ EDGE_TTS_VOICES = [
49
  ]
50
 
51
  # Initialize session state variables
 
 
 
 
 
 
 
 
 
 
52
  if 'tts_voice' not in st.session_state:
53
  st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
54
  if 'audio_format' not in st.session_state:
@@ -102,26 +112,55 @@ FILE_EMOJIS = {
102
  "wav": "🔊"
103
  }
104
 
105
- # Marquee Functions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  def get_marquee_settings():
107
- """Get global marquee settings from sidebar controls"""
 
 
 
 
 
 
108
  st.sidebar.markdown("### 🎯 Marquee Settings")
109
  cols = st.sidebar.columns(2)
110
  with cols[0]:
111
- bg_color = st.color_picker("🎨 Background", "#1E1E1E", key="bg_color_picker")
112
- text_color = st.color_picker("✍️ Text", "#FFFFFF", key="text_color_picker")
 
 
 
 
113
  with cols[1]:
114
  font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider")
115
  duration = st.slider("⏱️ Speed", 1, 20, 10, key="duration_slider")
116
-
117
- return {
118
  "background": bg_color,
119
  "color": text_color,
120
  "font-size": f"{font_size}px",
121
- "animationDuration": f"{duration}s",
122
- "width": "100%",
123
- "lineHeight": "35px"
124
- }
125
 
126
  def display_marquee(text, settings, key_suffix=""):
127
  """Display marquee with given text and settings"""
@@ -133,13 +172,6 @@ def display_marquee(text, settings, key_suffix=""):
133
  )
134
  st.write("")
135
 
136
- def process_paper_content(paper):
137
- """Process paper content for marquee and audio"""
138
- marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:100]} | 📝 {paper['summary'][:100]}"
139
- audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
140
- return marquee_text, audio_text
141
-
142
- # Text Processing Functions
143
  def get_high_info_terms(text: str, top_n=10) -> list:
144
  stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
145
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
@@ -157,17 +189,8 @@ def clean_text_for_filename(text: str) -> str:
157
  filtered = [w for w in words if len(w) > 3 and w not in stop_short]
158
  return '_'.join(filtered)[:200]
159
 
160
- def clean_for_speech(text: str) -> str:
161
- text = text.replace("\n", " ")
162
- text = text.replace("</s>", " ")
163
- text = text.replace("#", "")
164
- text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
165
- text = re.sub(r"\s+", " ", text).strip()
166
- return text
167
-
168
- # File Operations
169
  def generate_filename(prompt, response, file_type="md"):
170
- prefix = datetime.now().strftime("%y%m_%H%M") + "_"
171
  combined = (prompt + " " + response).strip()
172
  info_terms = get_high_info_terms(combined, top_n=10)
173
  snippet = (prompt[:100] + " " + response[:100]).strip()
@@ -198,7 +221,14 @@ def get_download_link(file, file_type="zip"):
198
  else:
199
  return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
200
 
201
- # Audio Processing
 
 
 
 
 
 
 
202
  async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
203
  text = clean_for_speech(text)
204
  if not text.strip():
@@ -219,7 +249,74 @@ def play_and_download_audio(file_path, file_type="mp3"):
219
  dl_link = get_download_link(file_path, file_type=file_type)
220
  st.markdown(dl_link, unsafe_allow_html=True)
221
 
222
- # Paper Processing Functions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  def parse_arxiv_refs(ref_text: str):
224
  if not ref_text:
225
  return []
@@ -269,58 +366,10 @@ def parse_arxiv_refs(ref_text: str):
269
 
270
  return results[:20]
271
 
272
- def create_paper_audio_files(papers, input_question):
273
- for paper in papers:
274
- try:
275
- marquee_text, audio_text = process_paper_content(paper)
276
-
277
- audio_text = clean_for_speech(audio_text)
278
- file_format = st.session_state['audio_format']
279
- audio_file = speak_with_edge_tts(audio_text,
280
- voice=st.session_state['tts_voice'],
281
- file_format=file_format)
282
- paper['full_audio'] = audio_file
283
-
284
- st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(audio_file)}")
285
- play_and_download_audio(audio_file, file_type=file_format)
286
- paper['marquee_text'] = marquee_text
287
-
288
- except Exception as e:
289
- st.warning(f"Error processing paper {paper['title']}: {str(e)}")
290
- paper['full_audio'] = None
291
- paper['marquee_text'] = None
292
-
293
- def display_papers(papers, marquee_settings):
294
- """Display papers with their audio controls and marquee summaries"""
295
- st.write("## Research Papers")
296
-
297
- papercount = 0
298
- for paper in papers:
299
- papercount += 1
300
- if papercount <= 20:
301
- # Display marquee if text exists
302
- if paper.get('marquee_text'):
303
- display_marquee(paper['marquee_text'],
304
- marquee_settings,
305
- key_suffix=f"paper_{papercount}")
306
-
307
- with st.expander(f"{papercount}. 📄 {paper['title']}", expanded=True):
308
- st.markdown(f"**{paper['date']} | {paper['title']} | ⬇️**")
309
- st.markdown(f"*{paper['authors']}*")
310
- st.markdown(paper['summary'])
311
-
312
- if paper.get('full_audio'):
313
- st.write("📚 Paper Audio")
314
- file_ext = os.path.splitext(paper['full_audio'])[1].lower().strip('.')
315
- if file_ext in ['mp3', 'wav']:
316
- st.audio(paper['full_audio'])
317
-
318
  def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
319
- titles_summary=True, full_audio=False, marquee_settings=None):
320
- """Perform Arxiv search with audio generation per paper."""
321
  start = time.time()
322
 
323
- # Query the HF RAG pipeline
324
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
325
  refs = client.predict(q, 20, "Semantic Search",
326
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -328,124 +377,76 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
328
  r2 = client.predict(q, "mistralai/Mixtral-8x7B-Instruct-v0.1",
329
  True, api_name="/ask_llm")
330
 
331
- # Combine for final text output
332
  result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
333
  st.markdown(result)
334
 
335
- # Parse and process papers
 
 
 
 
336
  papers = parse_arxiv_refs(refs)
337
  if papers:
338
  create_paper_audio_files(papers, input_question=q)
339
- if marquee_settings:
340
- display_papers(papers, marquee_settings)
341
- else:
342
- display_papers(papers, get_marquee_settings())
343
  else:
344
  st.warning("No papers found in the response.")
345
 
346
  elapsed = time.time()-start
347
  st.write(f"**Total Elapsed:** {elapsed:.2f} s")
348
-
349
- # Save full transcript
350
- create_file(q, result, "md")
351
  return result
352
 
353
- def process_with_gpt(text):
354
- """Process text with GPT-4"""
355
- if not text:
356
  return
357
- st.session_state.messages.append({"role":"user","content":text})
358
- with st.chat_message("user"):
359
- st.markdown(text)
360
- with st.chat_message("assistant"):
361
- c = openai_client.chat.completions.create(
362
- model=st.session_state["openai_model"],
363
- messages=st.session_state.messages,
364
- stream=False
365
- )
366
- ans = c.choices[0].message.content
367
- st.write("GPT-4o: " + ans)
368
- create_file(text, ans, "md")
369
- st.session_state.messages.append({"role":"assistant","content":ans})
370
- return ans
371
-
372
- def process_with_claude(text):
373
- """Process text with Claude"""
374
- if not text:
375
- return
376
- with st.chat_message("user"):
377
- st.markdown(text)
378
- with st.chat_message("assistant"):
379
- r = claude_client.messages.create(
380
- model="claude-3-sonnet-20240229",
381
- max_tokens=1000,
382
- messages=[{"role":"user","content":text}]
383
- )
384
- ans = r.content[0].text
385
- st.write("Claude-3.5: " + ans)
386
- create_file(text, ans, "md")
387
- st.session_state.chat_history.append({"user":text,"claude":ans})
388
- return ans
389
 
390
  def load_files_for_sidebar():
391
- """Load and group files for sidebar display based on first 9 characters of filename"""
392
  md_files = glob.glob("*.md")
393
  mp3_files = glob.glob("*.mp3")
394
  wav_files = glob.glob("*.wav")
395
-
396
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
397
  all_files = md_files + mp3_files + wav_files
398
 
399
  groups = defaultdict(list)
 
 
400
  for f in all_files:
401
  basename = os.path.basename(f)
402
- group_name = basename[:9] if len(basename) >= 9 else 'Other'
403
- groups[group_name].append(f)
404
-
 
 
 
405
  sorted_groups = sorted(groups.items(),
406
- key=lambda x: max(os.path.getmtime(f) for f in x[1]),
407
  reverse=True)
408
  return sorted_groups
409
 
410
- def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
411
- """Create zip with intelligent naming based on high-info terms"""
412
- md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
413
- all_files = md_files + mp3_files + wav_files
414
- if not all_files:
415
- return None
416
-
417
- all_content = []
418
- for f in all_files:
419
- if f.endswith('.md'):
420
- with open(f, 'r', encoding='utf-8') as file:
421
- all_content.append(file.read())
422
- elif f.endswith('.mp3') or f.endswith('.wav'):
423
- basename = os.path.splitext(os.path.basename(f))[0]
424
- words = basename.replace('_', ' ')
425
- all_content.append(words)
426
-
427
- all_content.append(input_question)
428
- combined_content = " ".join(all_content)
429
- info_terms = get_high_info_terms(combined_content, top_n=10)
430
-
431
- timestamp = datetime.now().strftime("%y%m_%H%M")
432
- name_text = '_'.join(term.replace(' ', '-') for term in info_terms[:10])
433
- zip_name = f"{timestamp}_{name_text}.zip"
434
-
435
- with zipfile.ZipFile(zip_name, 'w') as z:
436
- for f in all_files:
437
- z.write(f)
438
-
439
- return zip_name
440
-
441
  def display_file_manager_sidebar(groups_sorted):
442
- """Display file manager in sidebar with timestamp-based groups"""
443
  st.sidebar.title("🎵 Audio & Docs Manager")
444
 
445
  all_md = []
446
  all_mp3 = []
447
  all_wav = []
448
- for group_name, files in groups_sorted:
449
  for f in files:
450
  if f.endswith(".md"):
451
  all_md.append(f)
@@ -454,55 +455,91 @@ def display_file_manager_sidebar(groups_sorted):
454
  elif f.endswith(".wav"):
455
  all_wav.append(f)
456
 
457
- top_bar = st.sidebar.columns(4)
458
- with top_bar[0]:
459
- if st.button("🗑 DelAllMD"):
460
  for f in all_md:
461
  os.remove(f)
462
  st.session_state.should_rerun = True
463
- with top_bar[1]:
464
- if st.button("🗑 DelAllMP3"):
465
  for f in all_mp3:
466
  os.remove(f)
467
  st.session_state.should_rerun = True
468
- with top_bar[2]:
469
- if st.button("🗑 DelAllWAV"):
470
  for f in all_wav:
471
  os.remove(f)
472
  st.session_state.should_rerun = True
473
- with top_bar[3]:
474
  if st.button("⬇️ ZipAll"):
475
- zip_name = create_zip_of_files(all_md, all_mp3, all_wav,
476
- input_question=st.session_state.get('last_query', ''))
477
  if zip_name:
478
- st.sidebar.markdown(get_download_link(zip_name, file_type="zip"),
479
- unsafe_allow_html=True)
480
 
481
  for group_name, files in groups_sorted:
482
- timestamp_dt = datetime.strptime(group_name, "%y%m_%H%M") if len(group_name) == 9 else None
483
- group_label = timestamp_dt.strftime("%Y-%m-%d %H:%M") if timestamp_dt else group_name
484
-
 
 
 
 
 
 
485
  with st.sidebar.expander(f"📁 {group_label} ({len(files)})", expanded=True):
486
  c1, c2 = st.columns(2)
487
  with c1:
488
- if st.button("👀ViewGrp", key="view_group_"+group_name):
489
  st.session_state.viewing_prefix = group_name
490
  with c2:
491
- if st.button("🗑DelGrp", key="del_group_"+group_name):
492
  for f in files:
493
  os.remove(f)
494
- st.success(f"Deleted group {group_name}!")
495
  st.session_state.should_rerun = True
496
-
497
  for f in files:
498
  fname = os.path.basename(f)
499
  ext = os.path.splitext(fname)[1].lower()
500
  emoji = FILE_EMOJIS.get(ext.strip('.'), '')
501
- ctime = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%H:%M:%S")
 
502
  st.write(f"{emoji} **{fname}** - {ctime}")
503
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  def main():
505
- # Get marquee settings first
 
506
  marquee_settings = get_marquee_settings()
507
 
508
  # Initial welcome marquee
@@ -556,7 +593,7 @@ def main():
556
  val_stripped = val.replace('\\n', ' ')
557
  edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
558
 
559
- run_option = st.selectbox("Model:", ["Arxiv", "GPT-4o", "Claude-3.5"])
560
  col1, col2 = st.columns(2)
561
  with col1:
562
  autorun = st.checkbox("⚙ AutoRun", value=True)
@@ -568,30 +605,15 @@ def main():
568
  if autorun and input_changed:
569
  st.session_state.old_val = val
570
  st.session_state.last_query = edited_input
571
- if run_option == "Arxiv":
572
- perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
573
- titles_summary=True, full_audio=full_audio,
574
- marquee_settings=marquee_settings)
575
- else:
576
- if run_option == "GPT-4o":
577
- process_with_gpt(edited_input)
578
- elif run_option == "Claude-3.5":
579
- process_with_claude(edited_input)
580
  else:
581
  if st.button("▶ Run"):
582
  st.session_state.old_val = val
583
  st.session_state.last_query = edited_input
584
- if run_option == "Arxiv":
585
- perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
586
- titles_summary=True, full_audio=full_audio,
587
- marquee_settings=marquee_settings)
588
- else:
589
- if run_option == "GPT-4o":
590
- process_with_gpt(edited_input)
591
- elif run_option == "Claude-3.5":
592
- process_with_claude(edited_input)
593
 
594
- # ArXiv Tab
595
  if tab_main == "🔍 ArXiv":
596
  st.subheader("🔍 Query ArXiv")
597
  q = st.text_input("🔍 Query:")
@@ -606,32 +628,23 @@ def main():
606
  if q and st.button("🔍Run"):
607
  st.session_state.last_query = q
608
  result = perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
609
- titles_summary=titles_summary, full_audio=full_audio,
610
- marquee_settings=marquee_settings)
611
  if full_transcript:
612
  create_file(q, result, "md")
613
 
614
- # Voice Tab
615
  elif tab_main == "🎤 Voice":
616
  st.subheader("🎤 Voice Input")
617
  user_text = st.text_area("💬 Message:", height=100)
618
  user_text = user_text.strip().replace('\n', ' ')
619
 
620
  if st.button("📨 Send"):
621
- process_with_gpt(user_text)
622
 
623
  st.subheader("📜 Chat History")
624
- t1, t2 = st.tabs(["Claude History", "GPT-4o History"])
625
- with t1:
626
- for c in st.session_state.chat_history:
627
- st.write("**You:**", c["user"])
628
- st.write("**Claude:**", c["claude"])
629
- with t2:
630
- for m in st.session_state.messages:
631
- with st.chat_message(m["role"]):
632
- st.markdown(m["content"])
633
-
634
- # Media Tab
635
  elif tab_main == "📸 Media":
636
  st.header("📸 Images & 🎥 Videos")
637
  tabs = st.tabs(["🖼 Images", "🎥 Video"])
@@ -681,7 +694,6 @@ def main():
681
  else:
682
  st.write("No videos found.")
683
 
684
- # Editor Tab
685
  elif tab_main == "📝 Editor":
686
  if st.session_state.editing_file:
687
  st.subheader(f"Editing: {st.session_state.editing_file}")
@@ -720,7 +732,6 @@ def main():
720
  st.session_state.viewing_prefix = None
721
  st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
722
 
723
- # Add custom CSS
724
  st.markdown("""
725
  <style>
726
  .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
@@ -729,10 +740,9 @@ def main():
729
  </style>
730
  """, unsafe_allow_html=True)
731
 
732
- # Handle rerun if needed
733
  if st.session_state.should_rerun:
734
  st.session_state.should_rerun = False
735
  st.rerun()
736
 
737
  if __name__ == "__main__":
738
- main()
 
37
 
38
  # Add available English voices for Edge TTS
39
  EDGE_TTS_VOICES = [
40
+ "en-US-AriaNeural",
41
+ "en-US-GuyNeural",
42
  "en-US-JennyNeural",
43
  "en-GB-SoniaNeural",
44
  "en-GB-RyanNeural",
 
49
  ]
50
 
51
  # Initialize session state variables
52
+ if 'marquee_settings' not in st.session_state:
53
+ st.session_state['marquee_settings'] = {
54
+ "background": "#1E1E1E",
55
+ "color": "#FFFFFF",
56
+ "font-size": "14px",
57
+ "animationDuration": "10s",
58
+ "width": "100%",
59
+ "lineHeight": "35px"
60
+ }
61
+
62
  if 'tts_voice' not in st.session_state:
63
  st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
64
  if 'audio_format' not in st.session_state:
 
112
  "wav": "🔊"
113
  }
114
 
115
+ def get_central_time():
116
+ """Get current time in US Central timezone"""
117
+ central = pytz.timezone('US/Central')
118
+ return datetime.now(central)
119
+
120
+ def format_timestamp_prefix():
121
+ """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM"""
122
+ ct = get_central_time()
123
+ return ct.strftime("%m_%d_%y_%I_%M_%p")
124
+
125
+ def initialize_marquee_settings():
126
+ """Initialize marquee settings in session state"""
127
+ if 'marquee_settings' not in st.session_state:
128
+ st.session_state['marquee_settings'] = {
129
+ "background": "#1E1E1E",
130
+ "color": "#FFFFFF",
131
+ "font-size": "14px",
132
+ "animationDuration": "10s",
133
+ "width": "100%",
134
+ "lineHeight": "35px"
135
+ }
136
+
137
  def get_marquee_settings():
138
+ """Get or update marquee settings from session state"""
139
+ initialize_marquee_settings()
140
+ return st.session_state['marquee_settings']
141
+
142
+ def update_marquee_settings_ui():
143
+ """Update marquee settings via UI controls"""
144
+ initialize_marquee_settings()
145
  st.sidebar.markdown("### 🎯 Marquee Settings")
146
  cols = st.sidebar.columns(2)
147
  with cols[0]:
148
+ bg_color = st.color_picker("🎨 Background",
149
+ st.session_state['marquee_settings']["background"],
150
+ key="bg_color_picker")
151
+ text_color = st.color_picker("✍️ Text",
152
+ st.session_state['marquee_settings']["color"],
153
+ key="text_color_picker")
154
  with cols[1]:
155
  font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider")
156
  duration = st.slider("⏱️ Speed", 1, 20, 10, key="duration_slider")
157
+
158
+ st.session_state['marquee_settings'].update({
159
  "background": bg_color,
160
  "color": text_color,
161
  "font-size": f"{font_size}px",
162
+ "animationDuration": f"{duration}s"
163
+ })
 
 
164
 
165
  def display_marquee(text, settings, key_suffix=""):
166
  """Display marquee with given text and settings"""
 
172
  )
173
  st.write("")
174
 
 
 
 
 
 
 
 
175
  def get_high_info_terms(text: str, top_n=10) -> list:
176
  stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
177
  words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
 
189
  filtered = [w for w in words if len(w) > 3 and w not in stop_short]
190
  return '_'.join(filtered)[:200]
191
 
 
 
 
 
 
 
 
 
 
192
  def generate_filename(prompt, response, file_type="md"):
193
+ prefix = format_timestamp_prefix() + "_"
194
  combined = (prompt + " " + response).strip()
195
  info_terms = get_high_info_terms(combined, top_n=10)
196
  snippet = (prompt[:100] + " " + response[:100]).strip()
 
221
  else:
222
  return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
223
 
224
+ def clean_for_speech(text: str) -> str:
225
+ text = text.replace("\n", " ")
226
+ text = text.replace("</s>", " ")
227
+ text = text.replace("#", "")
228
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
229
+ text = re.sub(r"\s+", " ", text).strip()
230
+ return text
231
+
232
  async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
233
  text = clean_for_speech(text)
234
  if not text.strip():
 
249
  dl_link = get_download_link(file_path, file_type=file_type)
250
  st.markdown(dl_link, unsafe_allow_html=True)
251
 
252
+ def save_qa_with_audio(question, answer, voice=None):
253
+ """Save Q&A to markdown and generate audio"""
254
+ if not voice:
255
+ voice = st.session_state['tts_voice']
256
+
257
+ # Create markdown file
258
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
259
+ md_file = create_file(question, answer, "md")
260
+
261
+ # Generate audio file
262
+ audio_text = f"Question: {question}\n\nAnswer: {answer}"
263
+ audio_file = speak_with_edge_tts(
264
+ audio_text,
265
+ voice=voice,
266
+ file_format=st.session_state['audio_format']
267
+ )
268
+
269
+ return md_file, audio_file
270
+
271
+ def process_paper_content(paper):
272
+ marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:100]} | 📝 {paper['summary'][:100]}"
273
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
274
+ return marquee_text, audio_text
275
+
276
+ def create_paper_audio_files(papers, input_question):
277
+ for paper in papers:
278
+ try:
279
+ marquee_text, audio_text = process_paper_content(paper)
280
+
281
+ audio_text = clean_for_speech(audio_text)
282
+ file_format = st.session_state['audio_format']
283
+ audio_file = speak_with_edge_tts(audio_text,
284
+ voice=st.session_state['tts_voice'],
285
+ file_format=file_format)
286
+ paper['full_audio'] = audio_file
287
+
288
+ st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(audio_file)}")
289
+ play_and_download_audio(audio_file, file_type=file_format)
290
+ paper['marquee_text'] = marquee_text
291
+
292
+ except Exception as e:
293
+ st.warning(f"Error processing paper {paper['title']}: {str(e)}")
294
+ paper['full_audio'] = None
295
+ paper['marquee_text'] = None
296
+
297
+ def display_papers(papers, marquee_settings):
298
+ st.write("## Research Papers")
299
+
300
+ papercount = 0
301
+ for paper in papers:
302
+ papercount += 1
303
+ if papercount <= 20:
304
+ if paper.get('marquee_text'):
305
+ display_marquee(paper['marquee_text'],
306
+ marquee_settings,
307
+ key_suffix=f"paper_{papercount}")
308
+
309
+ with st.expander(f"{papercount}. 📄 {paper['title']}", expanded=True):
310
+ st.markdown(f"**{paper['date']} | {paper['title']} | ⬇️**")
311
+ st.markdown(f"*{paper['authors']}*")
312
+ st.markdown(paper['summary'])
313
+
314
+ if paper.get('full_audio'):
315
+ st.write("📚 Paper Audio")
316
+ file_ext = os.path.splitext(paper['full_audio'])[1].lower().strip('.')
317
+ if file_ext in ['mp3', 'wav']:
318
+ st.audio(paper['full_audio'])
319
+
320
  def parse_arxiv_refs(ref_text: str):
321
  if not ref_text:
322
  return []
 
366
 
367
  return results[:20]
368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
370
+ titles_summary=True, full_audio=False):
 
371
  start = time.time()
372
 
 
373
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
374
  refs = client.predict(q, 20, "Semantic Search",
375
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
 
377
  r2 = client.predict(q, "mistralai/Mixtral-8x7B-Instruct-v0.1",
378
  True, api_name="/ask_llm")
379
 
 
380
  result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
381
  st.markdown(result)
382
 
383
+ md_file, audio_file = save_qa_with_audio(q, result)
384
+
385
+ st.subheader("📝 Main Response Audio")
386
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
387
+
388
  papers = parse_arxiv_refs(refs)
389
  if papers:
390
  create_paper_audio_files(papers, input_question=q)
391
+ display_papers(papers, get_marquee_settings())
 
 
 
392
  else:
393
  st.warning("No papers found in the response.")
394
 
395
  elapsed = time.time()-start
396
  st.write(f"**Total Elapsed:** {elapsed:.2f} s")
 
 
 
397
  return result
398
 
399
+ def process_voice_input(text):
400
+ if not text:
 
401
  return
402
+
403
+ st.subheader("🔍 Search Results")
404
+ result = perform_ai_lookup(
405
+ text,
406
+ vocal_summary=True,
407
+ extended_refs=False,
408
+ titles_summary=True,
409
+ full_audio=True
410
+ )
411
+
412
+ md_file, audio_file = save_qa_with_audio(text, result)
413
+
414
+ st.subheader("📝 Generated Files")
415
+ st.write(f"Markdown: {md_file}")
416
+ st.write(f"Audio: {audio_file}")
417
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
  def load_files_for_sidebar():
 
420
  md_files = glob.glob("*.md")
421
  mp3_files = glob.glob("*.mp3")
422
  wav_files = glob.glob("*.wav")
423
+
424
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
425
  all_files = md_files + mp3_files + wav_files
426
 
427
  groups = defaultdict(list)
428
+ prefix_length = len("MM_dd_yy_hh_mm_AP")
429
+
430
  for f in all_files:
431
  basename = os.path.basename(f)
432
+ if len(basename) >= prefix_length and '_' in basename:
433
+ group_name = basename[:prefix_length]
434
+ groups[group_name].append(f)
435
+ else:
436
+ groups['Other'].append(f)
437
+
438
  sorted_groups = sorted(groups.items(),
439
+ key=lambda x: x[0] if x[0] != 'Other' else '',
440
  reverse=True)
441
  return sorted_groups
442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
  def display_file_manager_sidebar(groups_sorted):
 
444
  st.sidebar.title("🎵 Audio & Docs Manager")
445
 
446
  all_md = []
447
  all_mp3 = []
448
  all_wav = []
449
+ for _, files in groups_sorted:
450
  for f in files:
451
  if f.endswith(".md"):
452
  all_md.append(f)
 
455
  elif f.endswith(".wav"):
456
  all_wav.append(f)
457
 
458
+ col1, col2, col3, col4 = st.sidebar.columns(4)
459
+ with col1:
460
+ if st.button("🗑 DelMD"):
461
  for f in all_md:
462
  os.remove(f)
463
  st.session_state.should_rerun = True
464
+ with col2:
465
+ if st.button("🗑 DelMP3"):
466
  for f in all_mp3:
467
  os.remove(f)
468
  st.session_state.should_rerun = True
469
+ with col3:
470
+ if st.button("🗑 DelWAV"):
471
  for f in all_wav:
472
  os.remove(f)
473
  st.session_state.should_rerun = True
474
+ with col4:
475
  if st.button("⬇️ ZipAll"):
476
+ zip_name = create_zip_of_files(all_md, all_mp3, all_wav, st.session_state.get('last_query', ''))
 
477
  if zip_name:
478
+ st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
 
479
 
480
  for group_name, files in groups_sorted:
481
+ if group_name == 'Other':
482
+ group_label = 'Other Files'
483
+ else:
484
+ try:
485
+ timestamp_dt = datetime.strptime(group_name, "%m_%d_%y_%I_%M_%p")
486
+ group_label = timestamp_dt.strftime("%b %d, %Y %I:%M %p")
487
+ except ValueError:
488
+ group_label = group_name
489
+
490
  with st.sidebar.expander(f"📁 {group_label} ({len(files)})", expanded=True):
491
  c1, c2 = st.columns(2)
492
  with c1:
493
+ if st.button("👀 View", key=f"view_group_{group_name}"):
494
  st.session_state.viewing_prefix = group_name
495
  with c2:
496
+ if st.button("🗑 Del", key=f"del_group_{group_name}"):
497
  for f in files:
498
  os.remove(f)
499
+ st.success(f"Deleted group {group_label}!")
500
  st.session_state.should_rerun = True
501
+
502
  for f in files:
503
  fname = os.path.basename(f)
504
  ext = os.path.splitext(fname)[1].lower()
505
  emoji = FILE_EMOJIS.get(ext.strip('.'), '')
506
+ mtime = os.path.getmtime(f)
507
+ ctime = datetime.fromtimestamp(mtime).strftime("%I:%M:%S %p")
508
  st.write(f"{emoji} **{fname}** - {ctime}")
509
 
510
+ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
511
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
512
+ all_files = md_files + mp3_files + wav_files
513
+ if not all_files:
514
+ return None
515
+
516
+ all_content = []
517
+ for f in all_files:
518
+ if f.endswith('.md'):
519
+ with open(f, 'r', encoding='utf-8') as file:
520
+ all_content.append(file.read())
521
+ elif f.endswith('.mp3') or f.endswith('.wav'):
522
+ basename = os.path.splitext(os.path.basename(f))[0]
523
+ words = basename.replace('_', ' ')
524
+ all_content.append(words)
525
+
526
+ all_content.append(input_question)
527
+ combined_content = " ".join(all_content)
528
+ info_terms = get_high_info_terms(combined_content, top_n=10)
529
+
530
+ timestamp = format_timestamp_prefix()
531
+ name_text = '_'.join(term.replace(' ', '-') for term in info_terms[:10])
532
+ zip_name = f"{timestamp}_{name_text}.zip"
533
+
534
+ with zipfile.ZipFile(zip_name, 'w') as z:
535
+ for f in all_files:
536
+ z.write(f)
537
+
538
+ return zip_name
539
+
540
  def main():
541
+ # Update marquee settings UI first
542
+ update_marquee_settings_ui()
543
  marquee_settings = get_marquee_settings()
544
 
545
  # Initial welcome marquee
 
593
  val_stripped = val.replace('\\n', ' ')
594
  edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
595
 
596
+ run_option = st.selectbox("Model:", ["Arxiv"])
597
  col1, col2 = st.columns(2)
598
  with col1:
599
  autorun = st.checkbox("⚙ AutoRun", value=True)
 
605
  if autorun and input_changed:
606
  st.session_state.old_val = val
607
  st.session_state.last_query = edited_input
608
+ result = perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
609
+ titles_summary=True, full_audio=full_audio)
 
 
 
 
 
 
 
610
  else:
611
  if st.button("▶ Run"):
612
  st.session_state.old_val = val
613
  st.session_state.last_query = edited_input
614
+ result = perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
615
+ titles_summary=True, full_audio=full_audio)
 
 
 
 
 
 
 
616
 
 
617
  if tab_main == "🔍 ArXiv":
618
  st.subheader("🔍 Query ArXiv")
619
  q = st.text_input("🔍 Query:")
 
628
  if q and st.button("🔍Run"):
629
  st.session_state.last_query = q
630
  result = perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
631
+ titles_summary=titles_summary, full_audio=full_audio)
 
632
  if full_transcript:
633
  create_file(q, result, "md")
634
 
 
635
  elif tab_main == "🎤 Voice":
636
  st.subheader("🎤 Voice Input")
637
  user_text = st.text_area("💬 Message:", height=100)
638
  user_text = user_text.strip().replace('\n', ' ')
639
 
640
  if st.button("📨 Send"):
641
+ process_voice_input(user_text)
642
 
643
  st.subheader("📜 Chat History")
644
+ for c in st.session_state.chat_history:
645
+ st.write("**You:**", c["user"])
646
+ st.write("**Response:**", c["claude"])
647
+
 
 
 
 
 
 
 
648
  elif tab_main == "📸 Media":
649
  st.header("📸 Images & 🎥 Videos")
650
  tabs = st.tabs(["🖼 Images", "🎥 Video"])
 
694
  else:
695
  st.write("No videos found.")
696
 
 
697
  elif tab_main == "📝 Editor":
698
  if st.session_state.editing_file:
699
  st.subheader(f"Editing: {st.session_state.editing_file}")
 
732
  st.session_state.viewing_prefix = None
733
  st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
734
 
 
735
  st.markdown("""
736
  <style>
737
  .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
 
740
  </style>
741
  """, unsafe_allow_html=True)
742
 
 
743
  if st.session_state.should_rerun:
744
  st.session_state.should_rerun = False
745
  st.rerun()
746
 
747
  if __name__ == "__main__":
748
+ main()