awacke1 commited on
Commit
8c120d1
·
verified ·
1 Parent(s): ffd10f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +320 -430
app.py CHANGED
@@ -20,29 +20,46 @@ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
  import asyncio
21
  import edge_tts
22
 
23
- # 1. Core Configuration & Setup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  st.set_page_config(
25
- page_title="🚲BikeAI🏆 Research Assistant Pro",
26
- page_icon="🚲🏆",
27
  layout="wide",
28
  initial_sidebar_state="auto",
29
  menu_items={
30
  'Get Help': 'https://huggingface.co/awacke1',
31
  'Report a bug': 'https://huggingface.co/spaces/awacke1',
32
- 'About': "Research Assistant Pro with Voice Search"
33
  }
34
  )
35
  load_dotenv()
36
 
37
- # 2. API Setup & Clients
38
  openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', ''))
39
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', st.secrets.get('ANTHROPIC_API_KEY', ''))
40
- hf_key = os.getenv('HF_KEY', st.secrets.get('HF_KEY', ''))
41
 
42
  openai_client = OpenAI(api_key=openai_api_key)
43
  claude_client = anthropic.Anthropic(api_key=anthropic_key)
44
 
45
- # 3. Session State Management
46
  if 'transcript_history' not in st.session_state:
47
  st.session_state['transcript_history'] = []
48
  if 'chat_history' not in st.session_state:
@@ -53,24 +70,30 @@ if 'messages' not in st.session_state:
53
  st.session_state['messages'] = []
54
  if 'last_voice_input' not in st.session_state:
55
  st.session_state['last_voice_input'] = ""
56
- if 'editing_file' not in st.session_state:
57
- st.session_state['editing_file'] = None
58
  if 'current_audio' not in st.session_state:
59
  st.session_state['current_audio'] = None
60
  if 'autoplay_audio' not in st.session_state:
61
  st.session_state['autoplay_audio'] = True
62
  if 'should_rerun' not in st.session_state:
63
  st.session_state['should_rerun'] = False
64
- if 'old_val' not in st.session_state:
65
- st.session_state['old_val'] = None
 
 
 
 
66
 
67
- # 4. Style Definitions
68
  st.markdown("""
69
  <style>
70
- .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
71
- .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
 
 
 
 
 
72
  .stButton>button {
73
- margin-right: 0.5rem;
74
  background-color: #4CAF50;
75
  color: white;
76
  padding: 0.5rem 1rem;
@@ -85,27 +108,37 @@ st.markdown("""
85
  margin: 1rem 0;
86
  padding: 1rem;
87
  border-radius: 10px;
88
- background: white;
89
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
90
  }
91
- .file-manager {
92
  padding: 1rem;
93
  background: white;
94
  border-radius: 10px;
95
  margin: 1rem 0;
96
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  </style>
98
  """, unsafe_allow_html=True)
99
 
100
- FILE_EMOJIS = {
101
- "md": "📝",
102
- "mp3": "🎵",
103
- "mp4": "🎥",
104
- "png": "🖼️",
105
- "jpg": "📸"
106
- }
107
-
108
- # 5. Voice Recognition Component
109
  def create_voice_component():
110
  """Create auto-searching voice recognition component"""
111
  return components.html(
@@ -126,9 +159,8 @@ def create_voice_component():
126
  const output = document.getElementById('output');
127
  let fullTranscript = '';
128
  let lastPauseTime = Date.now();
129
- let pauseThreshold = 1500; // Time in ms to wait before triggering search
130
 
131
- // Auto-start on load
132
  window.addEventListener('load', () => {
133
  setTimeout(() => {
134
  try {
@@ -160,10 +192,12 @@ def create_voice_component():
160
  interim.textContent = '';
161
  output.textContent = fullTranscript;
162
 
163
- // Send to Streamlit for processing
164
  window.parent.postMessage({
165
  type: 'streamlit:setComponentValue',
166
- value: fullTranscript,
 
 
 
167
  dataType: 'json',
168
  }, '*');
169
  } else if (interimTranscript) {
@@ -173,7 +207,6 @@ def create_voice_component():
173
  output.scrollTop = output.scrollHeight;
174
  };
175
 
176
- // Check for pauses and trigger search
177
  setInterval(() => {
178
  if (fullTranscript && Date.now() - lastPauseTime > pauseThreshold) {
179
  if (output.dataset.lastProcessed !== fullTranscript) {
@@ -213,127 +246,31 @@ def create_voice_component():
213
  height=200
214
  )
215
 
216
- # Available English voices
217
- ENGLISH_VOICES = [
218
- "en-US-AriaNeural", # Female, conversational
219
- "en-US-JennyNeural", # Female, customer service
220
- "en-US-GuyNeural", # Male, newscast
221
- "en-US-RogerNeural", # Male, calm
222
- "en-GB-SoniaNeural", # British female
223
- "en-GB-RyanNeural", # British male
224
- "en-AU-NatashaNeural", # Australian female
225
- "en-AU-WilliamNeural", # Australian male
226
- "en-CA-ClaraNeural", # Canadian female
227
- "en-CA-LiamNeural", # Canadian male
228
- "en-IE-EmilyNeural", # Irish female
229
- "en-IE-ConnorNeural", # Irish male
230
- "en-IN-NeerjaNeural", # Indian female
231
- "en-IN-PrabhatNeural", # Indian male
232
- ]
233
-
234
- def render_search_interface():
235
- """Render main search interface with auto-search voice component"""
236
- st.header("🔍 Voice Search")
237
-
238
- # Voice settings
239
- col1, col2 = st.columns([2, 1])
240
- with col1:
241
- selected_voice = st.selectbox(
242
- "Select Voice",
243
- ENGLISH_VOICES,
244
- index=0,
245
- help="Choose the voice for audio responses"
246
- )
247
- with col2:
248
- auto_search = st.checkbox("Auto-Search on Pause", value=True)
249
-
250
- # Voice component
251
- voice_result = create_voice_component()
252
-
253
- # Handle voice input
254
- if voice_result and isinstance(voice_result, (str, dict)):
255
- # Extract text and trigger info
256
- if isinstance(voice_result, dict):
257
- current_text = voice_result.get('text', '')
258
- trigger = voice_result.get('trigger')
259
- else:
260
- current_text = voice_result
261
- trigger = None
262
-
263
- # Process on pause trigger if enabled
264
- if auto_search and trigger == 'pause' and current_text:
265
- if current_text != st.session_state.get('last_processed_text', ''):
266
- st.session_state.last_processed_text = current_text
267
-
268
- # Show the detected text
269
- st.info(f"🎤 Detected: {current_text}")
270
-
271
- # Perform search
272
- try:
273
- with st.spinner("Searching and generating audio response..."):
274
- response, audio_file = asyncio.run(
275
- process_voice_search(
276
- current_text,
277
- voice=selected_voice
278
- )
279
- )
280
- if response:
281
- st.markdown(response)
282
- if audio_file:
283
- render_audio_result(audio_file, "Search Results")
284
-
285
- # Save to history
286
- st.session_state.transcript_history.append({
287
- 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
288
- 'query': current_text,
289
- 'response': response,
290
- 'audio': audio_file
291
- })
292
- except Exception as e:
293
- st.error(f"Error processing search: {str(e)}")
294
-
295
- # Manual search option
296
- with st.expander("📝 Manual Search", expanded=False):
297
- query = st.text_input("Search Query:", value=st.session_state.get('last_processed_text', ''))
298
- if st.button("🔍 Search"):
299
- try:
300
- with st.spinner("Searching and generating audio..."):
301
- response, audio_file = asyncio.run(
302
- process_voice_search(
303
- query,
304
- voice=selected_voice
305
- )
306
- )
307
- if response:
308
- st.markdown(response)
309
- if audio_file:
310
- render_audio_result(audio_file)
311
- except Exception as e:
312
- st.error(f"Error processing search: {str(e)}")
313
-
314
- # 6. Audio Processing Functions
315
- def get_autoplay_audio_html(audio_path, width="100%"):
316
- """Create HTML for autoplaying audio with controls"""
317
  try:
318
  with open(audio_path, "rb") as audio_file:
319
  audio_bytes = audio_file.read()
320
  audio_b64 = base64.b64encode(audio_bytes).decode()
321
  return f'''
322
- <audio controls autoplay style="width: {width};">
323
- <source src="data:audio/mpeg;base64,{audio_b64}" type="audio/mpeg">
324
- Your browser does not support the audio element.
325
- </audio>
326
- <div style="margin-top: 5px;">
327
- <a href="data:audio/mpeg;base64,{audio_b64}"
328
- download="{os.path.basename(audio_path)}"
329
- style="text-decoration: none;">
330
- ⬇️ Download Audio
331
- </a>
 
 
332
  </div>
333
  '''
334
  except Exception as e:
335
  return f"Error loading audio: {str(e)}"
336
 
 
337
  def clean_for_speech(text: str) -> str:
338
  """Clean text for speech synthesis"""
339
  text = text.replace("\n", " ")
@@ -344,7 +281,7 @@ def clean_for_speech(text: str) -> str:
344
  return text
345
 
346
  async def generate_audio(text, voice="en-US-AriaNeural", rate="+0%", pitch="+0Hz"):
347
- """Generate audio using Edge TTS"""
348
  text = clean_for_speech(text)
349
  if not text.strip():
350
  return None
@@ -361,38 +298,24 @@ def render_audio_result(audio_file, title="Generated Audio"):
361
  """Render audio result with autoplay in Streamlit"""
362
  if audio_file and os.path.exists(audio_file):
363
  st.markdown(f"### {title}")
364
- st.markdown(get_autoplay_audio_html(audio_file), unsafe_allow_html=True)
365
-
366
- # 7. File Operations
367
- def generate_filename(text, response="", file_type="md"):
368
- """Generate intelligent filename"""
369
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
370
- safe_text = re.sub(r'[^\w\s-]', '', text[:50])
371
- return f"{timestamp}_{safe_text}.{file_type}"
372
 
373
- def create_file(text, response, file_type="md"):
374
- """Create file with content"""
375
- filename = generate_filename(text, response, file_type)
376
- with open(filename, 'w', encoding='utf-8') as f:
377
- f.write(f"{text}\n\n{response}")
378
- return filename
379
-
380
- def get_download_link(file_path):
381
- """Generate download link for file"""
382
- with open(file_path, "rb") as file:
383
- contents = file.read()
384
- b64 = base64.b64encode(contents).decode()
385
- file_name = os.path.basename(file_path)
386
- return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">⬇️ Download {file_name}</a>'
387
 
388
- # 8. Search and Process Functions
389
- def perform_arxiv_search(query, response_type="summary"):
390
- """Enhanced Arxiv search with voice response"""
391
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
392
 
393
- # Get search results and AI interpretation
394
  refs = client.predict(
395
- query, 20, "Semantic Search",
396
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
397
  api_name="/update_with_rag_md"
398
  )[0]
@@ -404,279 +327,237 @@ def perform_arxiv_search(query, response_type="summary"):
404
  api_name="/ask_llm"
405
  )
406
 
407
- # Format response
408
- response = f"### 🔎 Search Results for: {query}\n\n{summary}\n\n### 📚 References\n\n{refs}"
409
-
410
  return response, refs
411
 
412
- async def process_voice_search(query):
413
- """Process voice search with automatic audio"""
414
- response, refs = perform_arxiv_search(query)
 
 
415
 
416
- # Generate audio from response
417
- audio_file = await generate_audio(response)
418
 
419
- # Update state
420
- st.session_state.current_audio = audio_file
 
 
 
421
 
422
- return response, audio_file
423
-
424
- def process_with_gpt(text):
425
- """Process text with GPT-4"""
426
- if not text:
427
- return
428
-
429
- st.session_state.messages.append({"role": "user", "content": text})
430
 
431
- with st.chat_message("user"):
432
- st.markdown(text)
433
-
434
- with st.chat_message("assistant"):
435
- response = openai_client.chat.completions.create(
436
- model=st.session_state.openai_model,
437
- messages=st.session_state.messages,
438
- stream=False
439
- )
440
-
441
- answer = response.choices[0].message.content
442
- st.write(f"GPT-4: {answer}")
443
-
444
- # Generate audio response
445
- audio_file = asyncio.run(generate_audio(answer))
446
- if audio_file:
447
- render_audio_result(audio_file, "GPT-4 Response")
448
-
449
- # Save response
450
- create_file(text, answer, "md")
451
- st.session_state.messages.append({"role": "assistant", "content": answer})
452
-
453
- return answer
454
-
455
- def process_with_claude(text):
456
- """Process text with Claude"""
457
- if not text:
458
- return
459
-
460
- with st.chat_message("user"):
461
- st.markdown(text)
462
-
463
- with st.chat_message("assistant"):
464
- response = claude_client.messages.create(
465
- model="claude-3-sonnet-20240229",
466
- max_tokens=1000,
467
- messages=[{"role": "user", "content": text}]
468
- )
469
-
470
- answer = response.content[0].text
471
- st.write(f"Claude-3: {answer}")
472
-
473
- # Generate audio response
474
- audio_file = asyncio.run(generate_audio(answer))
475
- if audio_file:
476
- render_audio_result(audio_file, "Claude Response")
477
-
478
- # Save response
479
- create_file(text, answer, "md")
480
- st.session_state.chat_history.append({"user": text, "claude": answer})
481
-
482
- return answer
483
 
484
- # 9. UI Components
485
  def render_search_interface():
486
- """Render main search interface with voice component"""
487
- st.header("🔍 Voice Search")
488
-
489
- # Voice component with autorun
490
- voice_text = create_voice_component()
491
 
492
- # Handle voice input
493
- if voice_text and isinstance(voice_text, (str, dict)):
494
- # Convert dict to string if necessary
495
- current_text = voice_text if isinstance(voice_text, str) else voice_text.get('value', '')
496
-
497
- # Compare with last processed text
498
- if current_text and current_text != st.session_state.get('last_voice_text', ''):
499
- st.session_state.last_voice_text = current_text
500
-
501
- # Clean the text
502
- cleaned_text = current_text.replace('\n', ' ').strip()
503
-
504
- # Process with selected model
505
- if st.session_state.autoplay_audio and cleaned_text:
506
- try:
507
- response, audio_file = asyncio.run(process_voice_search(cleaned_text))
508
- if response:
509
- st.markdown(response)
510
- if audio_file:
511
- render_audio_result(audio_file, "Search Results")
512
- except Exception as e:
513
- st.error(f"Error processing voice search: {str(e)}")
514
-
515
- # Manual search option
516
- with st.expander("📝 Manual Search", expanded=False):
517
- col1, col2 = st.columns([3, 1])
518
- with col1:
519
- query = st.text_input("Enter search query:")
520
- with col2:
521
- if st.button("🔍 Search"):
522
- try:
523
- response, audio_file = asyncio.run(process_voice_search(query))
524
- if response:
525
- st.markdown(response)
526
- if audio_file:
527
- render_audio_result(audio_file)
528
- except Exception as e:
529
- st.error(f"Error processing search: {str(e)}")
530
 
531
- def display_file_manager():
532
- """Display file manager with media preview"""
533
- st.sidebar.title("📁 File Manager")
534
-
535
- files = {
536
- 'Documents': glob.glob("*.md"),
537
- 'Audio': glob.glob("*.mp3"),
538
- 'Video': glob.glob("*.mp4"),
539
- 'Images': glob.glob("*.png") + glob.glob("*.jpg")
540
- }
541
-
542
- # Top actions
543
- col1, col2 = st.sidebar.columns(2)
544
  with col1:
545
- if st.button("🗑 Delete All"):
546
- for category in files.values():
547
- for file in category:
548
- os.remove(file)
549
- st.rerun()
550
-
551
  with col2:
552
- if st.button("⬇️ Download All"):
553
- zip_name = f"archive_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
554
- with zipfile.ZipFile(zip_name, 'w') as zipf:
555
- for category in files.values():
556
- for file in category:
557
- zipf.write(file)
558
- st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)
559
 
560
- # Display files by category
561
- for category, category_files in files.items():
562
- if category_files:
563
- with st.sidebar.expander(f"{FILE_EMOJIS.get(category.lower(), '📄')} {category} ({len(category_files)})", expanded=True):
564
- for file in sorted(category_files, key=os.path.getmtime, reverse=True):
565
- col1, col2, col3 = st.columns([3, 1, 1])
566
- with col1:
567
- st.markdown(f"**{os.path.basename(file)}**")
568
- with col2:
569
- st.markdown(get_download_link(file), unsafe_allow_html=True)
570
- with col3:
571
- if st.button("🗑", key=f"del_{file}"):
572
- os.remove(file)
573
- st.rerun()
574
-
575
- def display_media_gallery():
576
- """Display media files in gallery format"""
577
- media_tabs = st.tabs(["🎵 Audio", "🎥 Video", "📷 Images"])
578
 
579
- with media_tabs[0]:
580
- audio_files = glob.glob("*.mp3")
581
- if audio_files:
582
- for audio_file in audio_files:
583
- st.markdown(get_autoplay_audio_html(audio_file), unsafe_allow_html=True)
 
584
  else:
585
- st.write("No audio files found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
586
 
587
- with media_tabs[1]:
588
- video_files = glob.glob("*.mp4")
589
- if video_files:
590
- cols = st.columns(2)
591
- for idx, video_file in enumerate(video_files):
592
- with cols[idx % 2]:
593
- st.video(video_file)
594
- else:
595
- st.write("No video files found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
 
597
- with media_tabs[2]:
598
- image_files = glob.glob("*.png") + glob.glob("*.jpg")
599
- if image_files:
600
- cols = st.columns(3)
601
- for idx, image_file in enumerate(image_files):
602
- with cols[idx % 3]:
603
- st.image(Image.open(image_file), use_column_width=True)
604
- if st.button(f"Analyze {os.path.basename(image_file)}", key=f"analyze_{image_file}"):
605
- with st.spinner("Analyzing image..."):
606
- analysis = process_with_gpt(f"Analyze this image: {image_file}")
607
- st.markdown(analysis)
608
- else:
609
- st.write("No images found")
610
-
611
- def display_search_history():
612
- """Display search history with audio playback"""
613
- st.header("Search History")
614
-
615
- history_tabs = st.tabs(["🔍 Voice Searches", "💬 Chat History"])
616
-
617
- with history_tabs[0]:
618
- for entry in reversed(st.session_state.transcript_history):
619
- with st.expander(f"🔍 {entry['timestamp']} - {entry['query'][:50]}...", expanded=False):
620
- st.markdown(entry['response'])
621
- if entry.get('audio'):
622
- render_audio_result(entry['audio'], "Recorded Response")
623
-
624
- with history_tabs[1]:
625
- chat_tabs = st.tabs(["Claude History", "GPT-4 History"])
626
- with chat_tabs[0]:
627
- for chat in st.session_state.chat_history:
628
- st.markdown(f"**You:** {chat['user']}")
629
- st.markdown(f"**Claude:** {chat['claude']}")
630
- st.markdown("---")
631
- with chat_tabs[1]:
632
- for msg in st.session_state.messages:
633
- with st.chat_message(msg["role"]):
634
- st.markdown(msg["content"])
635
 
636
- # Main Application
637
  def main():
638
- st.title("🔬 Research Assistant Pro")
639
-
640
- # Initialize autorun setting
641
- if 'autorun' not in st.session_state:
642
- st.session_state.autorun = True
643
 
644
- # Settings sidebar
645
  with st.sidebar:
646
  st.title("⚙️ Settings")
647
- st.session_state.autorun = st.checkbox("Enable Autorun", value=True)
648
-
649
- st.subheader("Voice Settings")
650
- voice_options = [
651
- "en-US-AriaNeural",
652
- "en-US-GuyNeural",
653
- "en-GB-SoniaNeural",
654
- "en-AU-NatashaNeural"
655
- ]
656
- selected_voice = st.selectbox("Select Voice", voice_options)
657
 
 
658
  st.subheader("Audio Settings")
659
- rate = st.slider("Speech Rate", -50, 50, 0, 5)
660
- pitch = st.slider("Pitch", -50, 50, 0, 5)
661
-
662
  st.session_state.autoplay_audio = st.checkbox(
663
  "Autoplay Audio",
664
  value=True,
665
  help="Automatically play audio when generated"
666
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667
 
668
  # Main content tabs
669
  tabs = st.tabs(["🎤 Voice Search", "📚 History", "🎵 Media", "⚙️ Advanced"])
670
 
671
  with tabs[0]:
672
  render_search_interface()
673
-
674
  with tabs[1]:
675
- display_search_history()
676
-
 
 
 
 
 
 
 
 
677
  with tabs[2]:
678
- display_media_gallery()
 
 
 
 
 
 
 
 
 
679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  with tabs[3]:
681
  st.header("Advanced Settings")
682
 
@@ -684,41 +565,50 @@ def main():
684
  with col1:
685
  st.subheader("Model Settings")
686
  st.selectbox(
687
- "Default Search Model",
688
- ["Claude-3", "GPT-4", "Mixtral-8x7B"],
689
  key="default_model"
690
  )
691
  st.number_input(
692
- "Max Results",
693
- min_value=5,
694
- max_value=50,
695
- value=20,
696
- key="max_results"
697
  )
698
-
699
  with col2:
700
- st.subheader("Audio Settings")
701
  st.slider(
702
- "Max Audio Duration (seconds)",
703
- min_value=30,
704
- max_value=300,
705
- value=120,
706
- step=30,
707
- key="max_audio_duration"
708
  )
709
  st.checkbox(
710
- "High Quality Audio",
711
  value=True,
712
  key="high_quality_audio"
713
  )
714
-
715
- # File manager sidebar
716
- display_file_manager()
717
-
718
- # Handle rerun if needed
719
- if st.session_state.get('should_rerun', False):
720
- st.session_state.should_rerun = False
721
- st.rerun()
 
 
 
 
 
722
 
723
  if __name__ == "__main__":
724
- main()
 
 
 
 
 
20
  import asyncio
21
  import edge_tts
22
 
23
+ # Available English voices
24
+ ENGLISH_VOICES = [
25
+ "en-US-AriaNeural", # Female, conversational
26
+ "en-US-JennyNeural", # Female, customer service
27
+ "en-US-GuyNeural", # Male, newscast
28
+ "en-US-RogerNeural", # Male, calm
29
+ "en-GB-SoniaNeural", # British female
30
+ "en-GB-RyanNeural", # British male
31
+ "en-AU-NatashaNeural", # Australian female
32
+ "en-AU-WilliamNeural", # Australian male
33
+ "en-CA-ClaraNeural", # Canadian female
34
+ "en-CA-LiamNeural", # Canadian male
35
+ "en-IE-EmilyNeural", # Irish female
36
+ "en-IE-ConnorNeural", # Irish male
37
+ "en-IN-NeerjaNeural", # Indian female
38
+ "en-IN-PrabhatNeural", # Indian male
39
+ ]
40
+
41
+ # Core Configuration & Setup
42
  st.set_page_config(
43
+ page_title="ARIA Research Assistant",
44
+ page_icon="🔬",
45
  layout="wide",
46
  initial_sidebar_state="auto",
47
  menu_items={
48
  'Get Help': 'https://huggingface.co/awacke1',
49
  'Report a bug': 'https://huggingface.co/spaces/awacke1',
50
+ 'About': "ARIA: Academic Research Interactive Assistant"
51
  }
52
  )
53
  load_dotenv()
54
 
55
+ # API Setup
56
  openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', ''))
57
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', st.secrets.get('ANTHROPIC_API_KEY', ''))
 
58
 
59
  openai_client = OpenAI(api_key=openai_api_key)
60
  claude_client = anthropic.Anthropic(api_key=anthropic_key)
61
 
62
+ # Session State Management
63
  if 'transcript_history' not in st.session_state:
64
  st.session_state['transcript_history'] = []
65
  if 'chat_history' not in st.session_state:
 
70
  st.session_state['messages'] = []
71
  if 'last_voice_input' not in st.session_state:
72
  st.session_state['last_voice_input'] = ""
 
 
73
  if 'current_audio' not in st.session_state:
74
  st.session_state['current_audio'] = None
75
  if 'autoplay_audio' not in st.session_state:
76
  st.session_state['autoplay_audio'] = True
77
  if 'should_rerun' not in st.session_state:
78
  st.session_state['should_rerun'] = False
79
+ if 'autorun' not in st.session_state:
80
+ st.session_state.autorun = True
81
+ if 'run_option' not in st.session_state:
82
+ st.session_state.run_option = "Arxiv"
83
+ if 'last_processed_text' not in st.session_state:
84
+ st.session_state.last_processed_text = ""
85
 
86
+ # Custom CSS
87
  st.markdown("""
88
  <style>
89
+ .main {
90
+ background: linear-gradient(135deg, #1a1a1a, #2d2d2d);
91
+ color: #ffffff;
92
+ }
93
+ .stMarkdown {
94
+ font-family: 'Helvetica Neue', sans-serif;
95
+ }
96
  .stButton>button {
 
97
  background-color: #4CAF50;
98
  color: white;
99
  padding: 0.5rem 1rem;
 
108
  margin: 1rem 0;
109
  padding: 1rem;
110
  border-radius: 10px;
111
+ background: #f5f5f5;
112
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
113
  }
114
+ .voice-container {
115
  padding: 1rem;
116
  background: white;
117
  border-radius: 10px;
118
  margin: 1rem 0;
119
  }
120
+ .text-display {
121
+ margin: 1rem 0;
122
+ padding: 1rem;
123
+ background: #f9f9f9;
124
+ border-radius: 5px;
125
+ font-size: 1.1em;
126
+ }
127
+ .model-selector {
128
+ margin: 1rem 0;
129
+ padding: 0.5rem;
130
+ background: #ffffff;
131
+ border-radius: 5px;
132
+ }
133
+ .response-container {
134
+ margin-top: 2rem;
135
+ padding: 1rem;
136
+ background: rgba(255, 255, 255, 0.05);
137
+ border-radius: 10px;
138
+ }
139
  </style>
140
  """, unsafe_allow_html=True)
141
 
 
 
 
 
 
 
 
 
 
142
  def create_voice_component():
143
  """Create auto-searching voice recognition component"""
144
  return components.html(
 
159
  const output = document.getElementById('output');
160
  let fullTranscript = '';
161
  let lastPauseTime = Date.now();
162
+ let pauseThreshold = 1500;
163
 
 
164
  window.addEventListener('load', () => {
165
  setTimeout(() => {
166
  try {
 
192
  interim.textContent = '';
193
  output.textContent = fullTranscript;
194
 
 
195
  window.parent.postMessage({
196
  type: 'streamlit:setComponentValue',
197
+ value: {
198
+ text: fullTranscript,
199
+ trigger: 'speech'
200
+ },
201
  dataType: 'json',
202
  }, '*');
203
  } else if (interimTranscript) {
 
207
  output.scrollTop = output.scrollHeight;
208
  };
209
 
 
210
  setInterval(() => {
211
  if (fullTranscript && Date.now() - lastPauseTime > pauseThreshold) {
212
  if (output.dataset.lastProcessed !== fullTranscript) {
 
246
  height=200
247
  )
248
 
249
+ def get_audio_autoplay_html(audio_path):
250
+ """Create HTML for autoplaying audio with controls and download"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  try:
252
  with open(audio_path, "rb") as audio_file:
253
  audio_bytes = audio_file.read()
254
  audio_b64 = base64.b64encode(audio_bytes).decode()
255
  return f'''
256
+ <div class="audio-player">
257
+ <audio controls autoplay style="width: 100%;">
258
+ <source src="data:audio/mpeg;base64,{audio_b64}" type="audio/mpeg">
259
+ Your browser does not support the audio element.
260
+ </audio>
261
+ <div style="margin-top: 5px;">
262
+ <a href="data:audio/mpeg;base64,{audio_b64}"
263
+ download="{os.path.basename(audio_path)}"
264
+ style="text-decoration: none; color: #4CAF50;">
265
+ ⬇️ Download Audio
266
+ </a>
267
+ </div>
268
  </div>
269
  '''
270
  except Exception as e:
271
  return f"Error loading audio: {str(e)}"
272
 
273
+ # Audio Processing Functions
274
  def clean_for_speech(text: str) -> str:
275
  """Clean text for speech synthesis"""
276
  text = text.replace("\n", " ")
 
281
  return text
282
 
283
  async def generate_audio(text, voice="en-US-AriaNeural", rate="+0%", pitch="+0Hz"):
284
+ """Generate audio using Edge TTS with automatic playback"""
285
  text = clean_for_speech(text)
286
  if not text.strip():
287
  return None
 
298
  """Render audio result with autoplay in Streamlit"""
299
  if audio_file and os.path.exists(audio_file):
300
  st.markdown(f"### {title}")
301
+ st.markdown(get_audio_autoplay_html(audio_file), unsafe_allow_html=True)
 
 
 
 
 
 
 
302
 
303
+ async def process_voice_search(query, voice="en-US-AriaNeural"):
304
+ """Process voice search with automatic audio using selected voice"""
305
+ response, refs = perform_arxiv_search(query)
306
+
307
+ audio_file = await generate_audio(response, voice=voice)
308
+ st.session_state.current_audio = audio_file
309
+
310
+ return response, audio_file
 
 
 
 
 
 
311
 
312
+ # Arxiv Search Functions
313
+ def perform_arxiv_search(query):
314
+ """Enhanced Arxiv search with summary"""
315
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
316
 
 
317
  refs = client.predict(
318
+ query, 20, "Semantic Search",
319
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
320
  api_name="/update_with_rag_md"
321
  )[0]
 
327
  api_name="/ask_llm"
328
  )
329
 
330
+ response = f"### Search Results for: {query}\n\n{summary}\n\n### References\n\n{refs}"
 
 
331
  return response, refs
332
 
333
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True,
334
+ full_audio=False, voice="en-US-AriaNeural"):
335
+ """Full Arxiv search with audio summaries"""
336
+ start = time.time()
337
+ response, refs = perform_arxiv_search(q)
338
 
339
+ st.markdown(response)
 
340
 
341
+ # Generate audio responses
342
+ if full_audio:
343
+ audio_file = asyncio.run(generate_audio(response, voice=voice))
344
+ if audio_file:
345
+ render_audio_result(audio_file, "Complete Response")
346
 
347
+ if vocal_summary:
348
+ summary_audio = asyncio.run(generate_audio(
349
+ f"Summary of results for query: {q}",
350
+ voice=voice
351
+ ))
352
+ if summary_audio:
353
+ render_audio_result(summary_audio, "Summary")
 
354
 
355
+ elapsed = time.time() - start
356
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
357
+
358
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
 
 
360
  def render_search_interface():
361
+ """Main search interface with voice recognition and model selection"""
362
+ st.header("🔍 Voice Search & Research")
 
 
 
363
 
364
+ # Voice and model settings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
 
366
+ col1, col2, col3 = st.columns([2, 1, 1])
 
 
 
 
 
 
 
 
 
 
 
 
367
  with col1:
368
+ selected_voice = st.selectbox(
369
+ "Select Voice",
370
+ ENGLISH_VOICES,
371
+ index=0,
372
+ help="Choose the voice for audio responses"
373
+ )
374
  with col2:
375
+ run_option = st.selectbox(
376
+ "Model:",
377
+ ["Arxiv", "GPT-4o", "Claude-3.5"],
378
+ key="run_option"
379
+ )
380
+ with col3:
381
+ autorun = st.checkbox("⚙ AutoRun", value=True, key="autorun")
382
 
383
+ # Voice component
384
+ voice_result = create_voice_component()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
+ # Handle voice input with autorun
387
+ if voice_result and isinstance(voice_result, (str, dict)):
388
+ # Extract text and trigger info
389
+ if isinstance(voice_result, dict):
390
+ current_text = voice_result.get('text', '')
391
+ trigger = voice_result.get('trigger')
392
  else:
393
+ current_text = voice_result
394
+ trigger = None
395
+
396
+ # Show text in edit box
397
+ edited_input = st.text_area(
398
+ "✏️ Edit Input:",
399
+ value=current_text,
400
+ height=100,
401
+ key="edited_input"
402
+ )
403
+
404
+ # Check if input has changed
405
+ input_changed = (edited_input != st.session_state.get('last_processed_text', ''))
406
+
407
+ # Process based on autorun and model selection
408
+ if autorun and input_changed and edited_input:
409
+ st.session_state.last_processed_text = edited_input
410
 
411
+ try:
412
+ with st.spinner("Processing..."):
413
+ if run_option == "Arxiv":
414
+ result = perform_ai_lookup(
415
+ edited_input,
416
+ vocal_summary=True,
417
+ extended_refs=False,
418
+ titles_summary=True,
419
+ full_audio=True,
420
+ voice=selected_voice
421
+ )
422
+ elif run_option == "GPT-4o":
423
+ result = process_with_gpt(edited_input)
424
+ # Generate audio for GPT response
425
+ audio_file = asyncio.run(generate_audio(result, voice=selected_voice))
426
+ if audio_file:
427
+ render_audio_result(audio_file, "GPT-4 Response")
428
+ elif run_option == "Claude-3.5":
429
+ result = process_with_claude(edited_input)
430
+ # Generate audio for Claude response
431
+ audio_file = asyncio.run(generate_audio(result, voice=selected_voice))
432
+ if audio_file:
433
+ render_audio_result(audio_file, "Claude Response")
434
+
435
+ # Save to history
436
+ st.session_state.transcript_history.append({
437
+ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
438
+ 'query': edited_input,
439
+ 'response': result,
440
+ 'model': run_option
441
+ })
442
 
443
+ except Exception as e:
444
+ st.error(f"Error processing request: {str(e)}")
445
+
446
+ # Manual run button
447
+ elif st.button("▶ Run"):
448
+ try:
449
+ with st.spinner("Processing..."):
450
+ if run_option == "Arxiv":
451
+ result = perform_ai_lookup(
452
+ edited_input,
453
+ vocal_summary=True,
454
+ extended_refs=False,
455
+ titles_summary=True,
456
+ full_audio=True,
457
+ voice=selected_voice
458
+ )
459
+ elif run_option == "GPT-4o":
460
+ result = process_with_gpt(edited_input)
461
+ audio_file = asyncio.run(generate_audio(result, voice=selected_voice))
462
+ if audio_file:
463
+ render_audio_result(audio_file, "GPT-4 Response")
464
+ elif run_option == "Claude-3.5":
465
+ result = process_with_claude(edited_input)
466
+ audio_file = asyncio.run(generate_audio(result, voice=selected_voice))
467
+ if audio_file:
468
+ render_audio_result(audio_file, "Claude Response")
469
+
470
+ # Save to history
471
+ st.session_state.transcript_history.append({
472
+ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
473
+ 'query': edited_input,
474
+ 'response': result,
475
+ 'model': run_option
476
+ })
477
+
478
+ except Exception as e:
479
+ st.error(f"Error processing request: {str(e)}")
 
480
 
 
481
  def main():
482
+ st.title("🔬 ARIA Research Assistant")
 
 
 
 
483
 
484
+ # Initialize settings
485
  with st.sidebar:
486
  st.title("⚙️ Settings")
 
 
 
 
 
 
 
 
 
 
487
 
488
+ # Audio Settings
489
  st.subheader("Audio Settings")
 
 
 
490
  st.session_state.autoplay_audio = st.checkbox(
491
  "Autoplay Audio",
492
  value=True,
493
  help="Automatically play audio when generated"
494
  )
495
+
496
+ rate = st.slider("Speech Rate", -50, 50, 0, 5)
497
+ pitch = st.slider("Pitch", -50, 50, 0, 5)
498
+
499
+ # Advanced Settings
500
+ st.subheader("Advanced")
501
+ save_history = st.checkbox(
502
+ "Save History",
503
+ value=True,
504
+ help="Save transcripts and responses"
505
+ )
506
+ cleanup_old = st.checkbox(
507
+ "Auto Cleanup",
508
+ value=False,
509
+ help="Remove old files automatically"
510
+ )
511
 
512
  # Main content tabs
513
  tabs = st.tabs(["🎤 Voice Search", "📚 History", "🎵 Media", "⚙️ Advanced"])
514
 
515
  with tabs[0]:
516
  render_search_interface()
517
+
518
  with tabs[1]:
519
+ st.header("Search History")
520
+ if st.session_state.transcript_history:
521
+ for entry in reversed(st.session_state.transcript_history):
522
+ with st.expander(
523
+ f"🔍 {entry['timestamp']} - {entry['query'][:50]}...",
524
+ expanded=False
525
+ ):
526
+ st.markdown(f"**Model:** {entry['model']}")
527
+ st.markdown(entry['response'])
528
+
529
  with tabs[2]:
530
+ st.header("Media Files")
531
+ media_tabs = st.tabs(["🎵 Audio", "🎥 Video", "📷 Images"])
532
+
533
+ with media_tabs[0]:
534
+ audio_files = glob.glob("*.mp3")
535
+ if audio_files:
536
+ for audio_file in sorted(audio_files, key=os.path.getmtime, reverse=True):
537
+ st.markdown(get_audio_autoplay_html(audio_file), unsafe_allow_html=True)
538
+ else:
539
+ st.write("No audio files found")
540
 
541
+ with media_tabs[1]:
542
+ video_files = glob.glob("*.mp4")
543
+ if video_files:
544
+ cols = st.columns(2)
545
+ for idx, video_file in enumerate(video_files):
546
+ with cols[idx % 2]:
547
+ st.video(video_file)
548
+ else:
549
+ st.write("No video files found")
550
+
551
+ with media_tabs[2]:
552
+ image_files = glob.glob("*.png") + glob.glob("*.jpg")
553
+ if image_files:
554
+ cols = st.columns(3)
555
+ for idx, image_file in enumerate(image_files):
556
+ with cols[idx % 3]:
557
+ st.image(Image.open(image_file), use_column_width=True)
558
+ else:
559
+ st.write("No images found")
560
+
561
  with tabs[3]:
562
  st.header("Advanced Settings")
563
 
 
565
  with col1:
566
  st.subheader("Model Settings")
567
  st.selectbox(
568
+ "Default Model",
569
+ ["Arxiv", "GPT-4o", "Claude-3.5"],
570
  key="default_model"
571
  )
572
  st.number_input(
573
+ "Max Response Length",
574
+ min_value=100,
575
+ max_value=2000,
576
+ value=1000,
577
+ key="max_tokens"
578
  )
579
+
580
  with col2:
581
+ st.subheader("Voice Settings")
582
  st.slider(
583
+ "Pause Detection (ms)",
584
+ min_value=500,
585
+ max_value=3000,
586
+ value=1500,
587
+ step=100,
588
+ key="pause_threshold"
589
  )
590
  st.checkbox(
591
+ "High Quality Voice",
592
  value=True,
593
  key="high_quality_audio"
594
  )
595
+
596
+ # Cleanup utility
597
+ def cleanup_old_files(days=7):
598
+ """Remove files older than specified days"""
599
+ current_time = time.time()
600
+ for pattern in ["*.md", "*.mp3"]:
601
+ for f in glob.glob(pattern):
602
+ creation_time = os.path.getctime(f)
603
+ if (current_time - creation_time) // (24 * 3600) >= days:
604
+ try:
605
+ os.remove(f)
606
+ except:
607
+ pass
608
 
609
  if __name__ == "__main__":
610
+ if st.session_state.get('cleanup_enabled', False):
611
+ cleanup_old_files()
612
+ main()
613
+
614
+