awacke1 commited on
Commit
572557a
ยท
verified ยท
1 Parent(s): 359a82d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -66
app.py CHANGED
@@ -19,7 +19,6 @@ import extra_streamlit_components as stx
19
  from streamlit.runtime.scriptrunner import get_script_run_ctx
20
  import asyncio
21
  import edge_tts
22
- import moviepy.editor as mp # ๐Ÿ†• Import moviepy for MP4 generation
23
 
24
  # ๐ŸŽฏ 1. Core Configuration & Setup
25
  st.set_page_config(
@@ -48,7 +47,7 @@ EDGE_TTS_VOICES = [
48
  "en-CA-LiamNeural"
49
  ]
50
 
51
- # Add this to your session state initialization section:
52
  if 'tts_voice' not in st.session_state:
53
  st.session_state['tts_voice'] = EDGE_TTS_VOICES[0] # Default voice
54
  if 'audio_format' not in st.session_state:
@@ -109,7 +108,7 @@ st.markdown("""
109
  FILE_EMOJIS = {
110
  "md": "๐Ÿ“",
111
  "mp3": "๐ŸŽต",
112
- "mp4": "๐ŸŽฌ" # ๐Ÿ†• Add emoji for MP4
113
  }
114
 
115
  # ๐Ÿง  5. High-Information Content Extraction
@@ -202,8 +201,8 @@ def get_download_link(file, file_type="zip"):
202
  return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ“‚ Download {os.path.basename(file)}</a>'
203
  elif file_type == "mp3":
204
  return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">๐ŸŽต Download {os.path.basename(file)}</a>'
205
- elif file_type == "mp4":
206
- return f'<a href="data:video/mp4;base64,{b64}" download="{os.path.basename(file)}">๐ŸŽฌ Download {os.path.basename(file)}</a>' # ๐Ÿ†• MP4 download link
207
  elif file_type == "md":
208
  return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ“ Download {os.path.basename(file)}</a>'
209
  else:
@@ -240,43 +239,21 @@ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=
240
  rate_str = f"{rate:+d}%"
241
  pitch_str = f"{pitch:+d}Hz"
242
  communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
243
- if file_format == "mp3":
244
- out_fn = generate_filename(text, text, "mp3")
245
- await communicate.save(out_fn)
246
- return out_fn
247
- elif file_format == "mp4":
248
- # Generate MP3 first
249
- mp3_filename = generate_filename(text, text, "mp3")
250
- await communicate.save(mp3_filename)
251
-
252
- # Create MP4 by combining MP3 with a placeholder image
253
- placeholder_image = "placeholder.jpg" # ๐Ÿ†• Ensure this image exists in your directory
254
- if not os.path.exists(placeholder_image):
255
- st.error(f"Placeholder image '{placeholder_image}' not found. Please add it to the directory.")
256
- return mp3_filename # Return MP3 if image not found
257
-
258
- video_filename = os.path.splitext(mp3_filename)[0] + ".mp4"
259
- try:
260
- audio_clip = mp.AudioFileClip(mp3_filename)
261
- image_clip = mp.ImageClip(placeholder_image).set_duration(audio_clip.duration)
262
- video_clip = image_clip.set_audio(audio_clip)
263
- video_clip.write_videofile(video_filename, codec="libx264", audio_codec="aac", verbose=False, logger=None)
264
- return video_filename
265
- except Exception as e:
266
- st.warning(f"Error generating MP4 for {mp3_filename}: {str(e)}")
267
- return mp3_filename # Return MP3 if MP4 generation fails
268
 
269
  def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
270
  """Wrapper for edge TTS generation"""
271
  return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
272
 
273
  def play_and_download_audio(file_path, file_type="mp3"):
274
- """Play and provide download link for audio/video"""
275
  if file_path and os.path.exists(file_path):
276
- if file_type in ["mp3", "wav"]:
 
 
277
  st.audio(file_path)
278
- elif file_type == "mp4":
279
- st.video(file_path) # ๐Ÿ†• Use st.video for MP4 files
280
  dl_link = get_download_link(file_path, file_type=file_type)
281
  st.markdown(dl_link, unsafe_allow_html=True)
282
 
@@ -299,7 +276,7 @@ def process_image(image_path, user_prompt):
299
  )
300
  return resp.choices[0].message.content
301
 
302
- def process_audio(audio_path):
303
  """Process audio with Whisper"""
304
  with open(audio_path, "rb") as f:
305
  transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
@@ -422,7 +399,7 @@ def create_paper_audio_files(papers, input_question):
422
  full_file = speak_with_edge_tts(full_text, voice=st.session_state['tts_voice'], file_format=file_format)
423
  paper['full_audio'] = full_file
424
 
425
- # Display the audio/video immediately after generation
426
  st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(full_file)}")
427
  play_and_download_audio(full_file, file_type=file_format)
428
 
@@ -432,18 +409,18 @@ def create_paper_audio_files(papers, input_question):
432
  st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
433
  paper['full_audio'] = None
434
 
435
- # After all individual audios, create a combined summary audio/video
436
  if combined_titles:
437
  combined_text = f"Here are the titles of the papers related to your query: {'; '.join(combined_titles)}. Your original question was: {input_question}"
438
  file_format = st.session_state['audio_format']
439
  combined_file = speak_with_edge_tts(combined_text, voice=st.session_state['tts_voice'], file_format=file_format)
440
- st.write(f"### {FILE_EMOJIS.get(file_format, '')} Combined Summary {'Video' if file_format=='mp4' else 'Audio'}")
441
  play_and_download_audio(combined_file, file_type=file_format)
442
  papers.append({'title': 'Combined Summary', 'full_audio': combined_file})
443
 
444
  def display_papers(papers):
445
  """
446
- Display papers with their audio/video controls using URLs as unique keys.
447
  """
448
  st.write("## Research Papers")
449
 
@@ -453,18 +430,18 @@ def display_papers(papers):
453
  st.markdown(f"*{paper['authors']}*")
454
  st.markdown(paper['summary'])
455
 
456
- # Single audio/video control for full content
457
  if paper.get('full_audio'):
458
- st.write("๐Ÿ“š Paper Audio/Video")
459
- file_ext = os.path.splitext(paper['full_audio'])[1].lower()
460
- if file_ext == ".mp3":
 
 
461
  st.audio(paper['full_audio'])
462
- elif file_ext == ".mp4":
463
- st.video(paper['full_audio'])
464
 
465
  def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
466
  titles_summary=True, full_audio=False):
467
- """Perform Arxiv search with audio/video generation per paper."""
468
  start = time.time()
469
 
470
  # Query the HF RAG pipeline
@@ -532,11 +509,11 @@ def process_with_claude(text):
532
  return ans
533
 
534
  # ๐Ÿ“‚ 10. File Management
535
- def create_zip_of_files(md_files, mp3_files, mp4_files, input_question):
536
  """Create zip with intelligent naming based on top 10 common words."""
537
  # Exclude 'readme.md'
538
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
539
- all_files = md_files + mp3_files + mp4_files
540
  if not all_files:
541
  return None
542
 
@@ -546,12 +523,7 @@ def create_zip_of_files(md_files, mp3_files, mp4_files, input_question):
546
  if f.endswith('.md'):
547
  with open(f, 'r', encoding='utf-8') as file:
548
  all_content.append(file.read())
549
- elif f.endswith('.mp3'):
550
- # Replace underscores with spaces and extract basename without extension
551
- basename = os.path.splitext(os.path.basename(f))[0]
552
- words = basename.replace('_', ' ')
553
- all_content.append(words)
554
- elif f.endswith('.mp4'):
555
  # Replace underscores with spaces and extract basename without extension
556
  basename = os.path.splitext(os.path.basename(f))[0]
557
  words = basename.replace('_', ' ')
@@ -577,10 +549,10 @@ def load_files_for_sidebar():
577
  """Load and group files for sidebar display"""
578
  md_files = glob.glob("*.md")
579
  mp3_files = glob.glob("*.mp3")
580
- mp4_files = glob.glob("*.mp4") # ๐Ÿ†• Load MP4 files
581
 
582
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
583
- all_files = md_files + mp3_files + mp4_files
584
 
585
  groups = defaultdict(list)
586
  for f in all_files:
@@ -610,17 +582,17 @@ def display_file_manager_sidebar(groups_sorted):
610
 
611
  all_md = []
612
  all_mp3 = []
613
- all_mp4 = [] # ๐Ÿ†• List to hold MP4 files
614
  for group_name, files in groups_sorted:
615
  for f in files:
616
  if f.endswith(".md"):
617
  all_md.append(f)
618
  elif f.endswith(".mp3"):
619
  all_mp3.append(f)
620
- elif f.endswith(".mp4"):
621
- all_mp4.append(f) # ๐Ÿ†• Append MP4 files
622
 
623
- top_bar = st.sidebar.columns(4) # ๐Ÿ†• Adjusted columns to accommodate MP4
624
  with top_bar[0]:
625
  if st.button("๐Ÿ—‘ DelAllMD"):
626
  for f in all_md:
@@ -632,13 +604,13 @@ def display_file_manager_sidebar(groups_sorted):
632
  os.remove(f)
633
  st.session_state.should_rerun = True
634
  with top_bar[2]:
635
- if st.button("๐Ÿ—‘ DelAllMP4"):
636
- for f in all_mp4:
637
  os.remove(f)
638
  st.session_state.should_rerun = True
639
  with top_bar[3]:
640
  if st.button("โฌ‡๏ธ ZipAll"):
641
- zip_name = create_zip_of_files(all_md, all_mp3, all_mp4, input_question=st.session_state.get('last_query', ''))
642
  if zip_name:
643
  st.sidebar.markdown(get_download_link(zip_name, file_type="zip"), unsafe_allow_html=True)
644
 
@@ -677,7 +649,7 @@ def main():
677
  st.sidebar.markdown("### ๐Ÿ”Š Audio Format")
678
  selected_format = st.sidebar.radio(
679
  "Choose Audio Format:",
680
- options=["MP3", "MP4"],
681
  index=0 # Default to MP3
682
  )
683
 
@@ -838,8 +810,8 @@ def main():
838
  st.markdown(content)
839
  elif ext == "mp3":
840
  st.audio(f)
841
- elif ext == "mp4":
842
- st.video(f) # ๐Ÿ†• Handle MP4 files
843
  else:
844
  st.markdown(get_download_link(f), unsafe_allow_html=True)
845
  break
 
19
  from streamlit.runtime.scriptrunner import get_script_run_ctx
20
  import asyncio
21
  import edge_tts
 
22
 
23
  # ๐ŸŽฏ 1. Core Configuration & Setup
24
  st.set_page_config(
 
47
  "en-CA-LiamNeural"
48
  ]
49
 
50
+ # Initialize session state variables
51
  if 'tts_voice' not in st.session_state:
52
  st.session_state['tts_voice'] = EDGE_TTS_VOICES[0] # Default voice
53
  if 'audio_format' not in st.session_state:
 
108
  FILE_EMOJIS = {
109
  "md": "๐Ÿ“",
110
  "mp3": "๐ŸŽต",
111
+ "wav": "๐Ÿ”Š" # ๐Ÿ†• Add emoji for WAV
112
  }
113
 
114
  # ๐Ÿง  5. High-Information Content Extraction
 
201
  return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ“‚ Download {os.path.basename(file)}</a>'
202
  elif file_type == "mp3":
203
  return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">๐ŸŽต Download {os.path.basename(file)}</a>'
204
+ elif file_type == "wav":
205
+ return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ”Š Download {os.path.basename(file)}</a>' # ๐Ÿ†• WAV download link
206
  elif file_type == "md":
207
  return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">๐Ÿ“ Download {os.path.basename(file)}</a>'
208
  else:
 
239
  rate_str = f"{rate:+d}%"
240
  pitch_str = f"{pitch:+d}Hz"
241
  communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
242
+ out_fn = generate_filename(text, text, file_type=file_format)
243
+ await communicate.save(out_fn)
244
+ return out_fn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
247
  """Wrapper for edge TTS generation"""
248
  return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
249
 
250
  def play_and_download_audio(file_path, file_type="mp3"):
251
+ """Play and provide download link for audio"""
252
  if file_path and os.path.exists(file_path):
253
+ if file_type == "mp3":
254
+ st.audio(file_path)
255
+ elif file_type == "wav":
256
  st.audio(file_path)
 
 
257
  dl_link = get_download_link(file_path, file_type=file_type)
258
  st.markdown(dl_link, unsafe_allow_html=True)
259
 
 
276
  )
277
  return resp.choices[0].message.content
278
 
279
+ def process_audio_file(audio_path):
280
  """Process audio with Whisper"""
281
  with open(audio_path, "rb") as f:
282
  transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
 
399
  full_file = speak_with_edge_tts(full_text, voice=st.session_state['tts_voice'], file_format=file_format)
400
  paper['full_audio'] = full_file
401
 
402
+ # Display the audio immediately after generation
403
  st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(full_file)}")
404
  play_and_download_audio(full_file, file_type=file_format)
405
 
 
409
  st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
410
  paper['full_audio'] = None
411
 
412
+ # After all individual audios, create a combined summary audio
413
  if combined_titles:
414
  combined_text = f"Here are the titles of the papers related to your query: {'; '.join(combined_titles)}. Your original question was: {input_question}"
415
  file_format = st.session_state['audio_format']
416
  combined_file = speak_with_edge_tts(combined_text, voice=st.session_state['tts_voice'], file_format=file_format)
417
+ st.write(f"### {FILE_EMOJIS.get(file_format, '')} Combined Summary Audio")
418
  play_and_download_audio(combined_file, file_type=file_format)
419
  papers.append({'title': 'Combined Summary', 'full_audio': combined_file})
420
 
421
  def display_papers(papers):
422
  """
423
+ Display papers with their audio controls using URLs as unique keys.
424
  """
425
  st.write("## Research Papers")
426
 
 
430
  st.markdown(f"*{paper['authors']}*")
431
  st.markdown(paper['summary'])
432
 
433
+ # Single audio control for full content
434
  if paper.get('full_audio'):
435
+ st.write("๐Ÿ“š Paper Audio")
436
+ file_ext = os.path.splitext(paper['full_audio'])[1].lower().strip('.')
437
+ if file_ext == "mp3":
438
+ st.audio(paper['full_audio'])
439
+ elif file_ext == "wav":
440
  st.audio(paper['full_audio'])
 
 
441
 
442
  def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
443
  titles_summary=True, full_audio=False):
444
+ """Perform Arxiv search with audio generation per paper."""
445
  start = time.time()
446
 
447
  # Query the HF RAG pipeline
 
509
  return ans
510
 
511
  # ๐Ÿ“‚ 10. File Management
512
+ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
513
  """Create zip with intelligent naming based on top 10 common words."""
514
  # Exclude 'readme.md'
515
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
516
+ all_files = md_files + mp3_files + wav_files
517
  if not all_files:
518
  return None
519
 
 
523
  if f.endswith('.md'):
524
  with open(f, 'r', encoding='utf-8') as file:
525
  all_content.append(file.read())
526
+ elif f.endswith('.mp3') or f.endswith('.wav'):
 
 
 
 
 
527
  # Replace underscores with spaces and extract basename without extension
528
  basename = os.path.splitext(os.path.basename(f))[0]
529
  words = basename.replace('_', ' ')
 
549
  """Load and group files for sidebar display"""
550
  md_files = glob.glob("*.md")
551
  mp3_files = glob.glob("*.mp3")
552
+ wav_files = glob.glob("*.wav") # ๐Ÿ†• Load WAV files
553
 
554
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
555
+ all_files = md_files + mp3_files + wav_files
556
 
557
  groups = defaultdict(list)
558
  for f in all_files:
 
582
 
583
  all_md = []
584
  all_mp3 = []
585
+ all_wav = [] # ๐Ÿ†• List to hold WAV files
586
  for group_name, files in groups_sorted:
587
  for f in files:
588
  if f.endswith(".md"):
589
  all_md.append(f)
590
  elif f.endswith(".mp3"):
591
  all_mp3.append(f)
592
+ elif f.endswith(".wav"):
593
+ all_wav.append(f) # ๐Ÿ†• Append WAV files
594
 
595
+ top_bar = st.sidebar.columns(4) # ๐Ÿ†• Adjusted columns to accommodate WAV
596
  with top_bar[0]:
597
  if st.button("๐Ÿ—‘ DelAllMD"):
598
  for f in all_md:
 
604
  os.remove(f)
605
  st.session_state.should_rerun = True
606
  with top_bar[2]:
607
+ if st.button("๐Ÿ—‘ DelAllWAV"):
608
+ for f in all_wav:
609
  os.remove(f)
610
  st.session_state.should_rerun = True
611
  with top_bar[3]:
612
  if st.button("โฌ‡๏ธ ZipAll"):
613
+ zip_name = create_zip_of_files(all_md, all_mp3, all_wav, input_question=st.session_state.get('last_query', ''))
614
  if zip_name:
615
  st.sidebar.markdown(get_download_link(zip_name, file_type="zip"), unsafe_allow_html=True)
616
 
 
649
  st.sidebar.markdown("### ๐Ÿ”Š Audio Format")
650
  selected_format = st.sidebar.radio(
651
  "Choose Audio Format:",
652
+ options=["MP3", "WAV"],
653
  index=0 # Default to MP3
654
  )
655
 
 
810
  st.markdown(content)
811
  elif ext == "mp3":
812
  st.audio(f)
813
+ elif ext == "wav":
814
+ st.audio(f) # ๐Ÿ†• Handle WAV files
815
  else:
816
  st.markdown(get_download_link(f), unsafe_allow_html=True)
817
  break