import streamlit as st import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile import plotly.graph_objects as go import streamlit.components.v1 as components from datetime import datetime from audio_recorder_streamlit import audio_recorder from bs4 import BeautifulSoup from collections import defaultdict, deque from dotenv import load_dotenv from gradio_client import Client from huggingface_hub import InferenceClient from io import BytesIO from PIL import Image from PyPDF2 import PdfReader from urllib.parse import quote from xml.etree import ElementTree as ET from openai import OpenAI import extra_streamlit_components as stx from streamlit.runtime.scriptrunner import get_script_run_ctx import asyncio import edge_tts # Available English voices ENGLISH_VOICES = [ "en-US-AriaNeural", # Female, conversational "en-US-JennyNeural", # Female, customer service "en-US-GuyNeural", # Male, newscast "en-US-RogerNeural", # Male, calm "en-GB-SoniaNeural", # British female "en-GB-RyanNeural", # British male "en-AU-NatashaNeural", # Australian female "en-AU-WilliamNeural", # Australian male "en-CA-ClaraNeural", # Canadian female "en-CA-LiamNeural", # Canadian male "en-IE-EmilyNeural", # Irish female "en-IE-ConnorNeural", # Irish male "en-IN-NeerjaNeural", # Indian female "en-IN-PrabhatNeural", # Indian male ] # Core Configuration & Setup st.set_page_config( page_title="ARIA Research Assistant", page_icon="🔬", layout="wide", initial_sidebar_state="auto", menu_items={ 'Get Help': 'https://huggingface.co/awacke1', 'Report a bug': 'https://huggingface.co/spaces/awacke1', 'About': "ARIA: Academic Research Interactive Assistant" } ) load_dotenv() # API Setup openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', '')) anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', st.secrets.get('ANTHROPIC_API_KEY', '')) openai_client = OpenAI(api_key=openai_api_key) claude_client = anthropic.Anthropic(api_key=anthropic_key) # Session State Management if 'transcript_history' not in st.session_state: st.session_state['transcript_history'] = [] if 'chat_history' not in st.session_state: st.session_state['chat_history'] = [] if 'openai_model' not in st.session_state: st.session_state['openai_model'] = "gpt-4-vision-preview" if 'messages' not in st.session_state: st.session_state['messages'] = [] if 'last_voice_input' not in st.session_state: st.session_state['last_voice_input'] = "" if 'current_audio' not in st.session_state: st.session_state['current_audio'] = None if 'autoplay_audio' not in st.session_state: st.session_state['autoplay_audio'] = True if 'should_rerun' not in st.session_state: st.session_state['should_rerun'] = False if 'autorun' not in st.session_state: st.session_state.autorun = True if 'run_option' not in st.session_state: st.session_state.run_option = "Arxiv" if 'last_processed_text' not in st.session_state: st.session_state.last_processed_text = "" # Custom CSS st.markdown(""" """, unsafe_allow_html=True) def create_voice_component(): """Create auto-searching voice recognition component""" return components.html( """
Starting voice recognition...
""", height=200 ) def get_audio_autoplay_html(audio_path): """Create HTML for autoplaying audio with controls and download""" try: with open(audio_path, "rb") as audio_file: audio_bytes = audio_file.read() audio_b64 = base64.b64encode(audio_bytes).decode() return f'''
⬇️ Download Audio
''' except Exception as e: return f"Error loading audio: {str(e)}" # Audio Processing Functions def clean_for_speech(text: str) -> str: """Clean text for speech synthesis""" text = text.replace("\n", " ") text = text.replace("", " ") text = text.replace("#", "") text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text) text = re.sub(r"\s+", " ", text).strip() return text async def generate_audio(text, voice="en-US-AriaNeural", rate="+0%", pitch="+0Hz"): """Generate audio using Edge TTS with automatic playback""" text = clean_for_speech(text) if not text.strip(): return None timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_file = f"response_{timestamp}.mp3" communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) await communicate.save(output_file) return output_file def render_audio_result(audio_file, title="Generated Audio"): """Render audio result with autoplay in Streamlit""" if audio_file and os.path.exists(audio_file): st.markdown(f"### {title}") st.markdown(get_audio_autoplay_html(audio_file), unsafe_allow_html=True) async def process_voice_search(query, voice="en-US-AriaNeural"): """Process voice search with automatic audio using selected voice""" response, refs = perform_arxiv_search(query) audio_file = await generate_audio(response, voice=voice) st.session_state.current_audio = audio_file return response, audio_file # Arxiv Search Functions def perform_arxiv_search(query): """Enhanced Arxiv search with summary""" client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") refs = client.predict( query, 20, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md" )[0] summary = client.predict( query, "mistralai/Mixtral-8x7B-Instruct-v0.1", True, api_name="/ask_llm" ) response = f"### Search Results for: {query}\n\n{summary}\n\n### References\n\n{refs}" return response, refs def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, voice="en-US-AriaNeural"): """Full Arxiv search with audio summaries""" start = time.time() response, refs = perform_arxiv_search(q) st.markdown(response) # Generate audio responses if full_audio: audio_file = asyncio.run(generate_audio(response, voice=voice)) if audio_file: render_audio_result(audio_file, "Complete Response") if vocal_summary: summary_audio = asyncio.run(generate_audio( f"Summary of results for query: {q}", voice=voice )) if summary_audio: render_audio_result(summary_audio, "Summary") elapsed = time.time() - start st.write(f"**Total Elapsed:** {elapsed:.2f} s") return response def render_search_interface(): """Main search interface with voice recognition and model selection""" st.header("🔍 Voice Search & Research") # Voice and model settings col1, col2, col3 = st.columns([2, 1, 1]) with col1: selected_voice = st.selectbox( "Select Voice", ENGLISH_VOICES, index=0, help="Choose the voice for audio responses" ) with col2: run_option = st.selectbox( "Model:", ["Arxiv", "GPT-4o", "Claude-3.5"], key="run_option" ) with col3: autorun = st.checkbox("⚙ AutoRun", value=True, key="autorun") # Voice component voice_result = create_voice_component() # Handle voice input with autorun if voice_result and isinstance(voice_result, (str, dict)): # Extract text and trigger info if isinstance(voice_result, dict): current_text = voice_result.get('text', '') trigger = voice_result.get('trigger') else: current_text = voice_result trigger = None # Show text in edit box edited_input = st.text_area( "✏️ Edit Input:", value=current_text, height=100, key="edited_input" ) # Check if input has changed input_changed = (edited_input != st.session_state.get('last_processed_text', '')) # Process based on autorun and model selection if autorun and input_changed and edited_input: st.session_state.last_processed_text = edited_input try: with st.spinner("Processing..."): if run_option == "Arxiv": result = perform_ai_lookup( edited_input, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=True, voice=selected_voice ) elif run_option == "GPT-4o": result = process_with_gpt(edited_input) # Generate audio for GPT response audio_file = asyncio.run(generate_audio(result, voice=selected_voice)) if audio_file: render_audio_result(audio_file, "GPT-4 Response") elif run_option == "Claude-3.5": result = process_with_claude(edited_input) # Generate audio for Claude response audio_file = asyncio.run(generate_audio(result, voice=selected_voice)) if audio_file: render_audio_result(audio_file, "Claude Response") # Save to history st.session_state.transcript_history.append({ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'query': edited_input, 'response': result, 'model': run_option }) except Exception as e: st.error(f"Error processing request: {str(e)}") # Manual run button elif st.button("▶ Run"): try: with st.spinner("Processing..."): if run_option == "Arxiv": result = perform_ai_lookup( edited_input, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=True, voice=selected_voice ) elif run_option == "GPT-4o": result = process_with_gpt(edited_input) audio_file = asyncio.run(generate_audio(result, voice=selected_voice)) if audio_file: render_audio_result(audio_file, "GPT-4 Response") elif run_option == "Claude-3.5": result = process_with_claude(edited_input) audio_file = asyncio.run(generate_audio(result, voice=selected_voice)) if audio_file: render_audio_result(audio_file, "Claude Response") # Save to history st.session_state.transcript_history.append({ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'query': edited_input, 'response': result, 'model': run_option }) except Exception as e: st.error(f"Error processing request: {str(e)}") def main(): st.title("🔬 ARIA Research Assistant") # Initialize settings with st.sidebar: st.title("⚙️ Settings") # Audio Settings st.subheader("Audio Settings") st.session_state.autoplay_audio = st.checkbox( "Autoplay Audio", value=True, help="Automatically play audio when generated" ) rate = st.slider("Speech Rate", -50, 50, 0, 5) pitch = st.slider("Pitch", -50, 50, 0, 5) # Advanced Settings st.subheader("Advanced") save_history = st.checkbox( "Save History", value=True, help="Save transcripts and responses" ) cleanup_old = st.checkbox( "Auto Cleanup", value=False, help="Remove old files automatically" ) # Main content tabs tabs = st.tabs(["🎤 Voice Search", "📚 History", "🎵 Media", "⚙️ Advanced"]) with tabs[0]: render_search_interface() with tabs[1]: st.header("Search History") if st.session_state.transcript_history: for entry in reversed(st.session_state.transcript_history): with st.expander( f"🔍 {entry['timestamp']} - {entry['query'][:50]}...", expanded=False ): st.markdown(f"**Model:** {entry['model']}") st.markdown(entry['response']) with tabs[2]: st.header("Media Files") media_tabs = st.tabs(["🎵 Audio", "🎥 Video", "📷 Images"]) with media_tabs[0]: audio_files = glob.glob("*.mp3") if audio_files: for audio_file in sorted(audio_files, key=os.path.getmtime, reverse=True): st.markdown(get_audio_autoplay_html(audio_file), unsafe_allow_html=True) else: st.write("No audio files found") with media_tabs[1]: video_files = glob.glob("*.mp4") if video_files: cols = st.columns(2) for idx, video_file in enumerate(video_files): with cols[idx % 2]: st.video(video_file) else: st.write("No video files found") with media_tabs[2]: image_files = glob.glob("*.png") + glob.glob("*.jpg") if image_files: cols = st.columns(3) for idx, image_file in enumerate(image_files): with cols[idx % 3]: st.image(Image.open(image_file), use_column_width=True) else: st.write("No images found") with tabs[3]: st.header("Advanced Settings") col1, col2 = st.columns(2) with col1: st.subheader("Model Settings") st.selectbox( "Default Model", ["Arxiv", "GPT-4o", "Claude-3.5"], key="default_model" ) st.number_input( "Max Response Length", min_value=100, max_value=2000, value=1000, key="max_tokens" ) with col2: st.subheader("Voice Settings") st.slider( "Pause Detection (ms)", min_value=500, max_value=3000, value=1500, step=100, key="pause_threshold" ) st.checkbox( "High Quality Voice", value=True, key="high_quality_audio" ) # Cleanup utility def cleanup_old_files(days=7): """Remove files older than specified days""" current_time = time.time() for pattern in ["*.md", "*.mp3"]: for f in glob.glob(pattern): creation_time = os.path.getctime(f) if (current_time - creation_time) // (24 * 3600) >= days: try: os.remove(f) except: pass if __name__ == "__main__": if st.session_state.get('cleanup_enabled', False): cleanup_old_files() main()