DeepResearchEvaluator

Running

App Files Files Community

awacke1 commited on Dec 31, 2024

Commit

8c120d1

verified ·

1 Parent(s): ffd10f7

Update app.py

Browse files

Files changed (1) hide show

app.py +320 -430

app.py CHANGED Viewed

@@ -20,29 +20,46 @@ from streamlit.runtime.scriptrunner import get_script_run_ctx
 import asyncio
 import edge_tts
-# 1. Core Configuration & Setup
 st.set_page_config(
-    page_title="🚲BikeAI🏆 Research Assistant Pro",
-    page_icon="🚲🏆",
     layout="wide",
     initial_sidebar_state="auto",
     menu_items={
         'Get Help': 'https://huggingface.co/awacke1',
         'Report a bug': 'https://huggingface.co/spaces/awacke1',
-        'About': "Research Assistant Pro with Voice Search"
     }
 )
 load_dotenv()
-# 2. API Setup & Clients
 openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', ''))
 anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', st.secrets.get('ANTHROPIC_API_KEY', ''))
-hf_key = os.getenv('HF_KEY', st.secrets.get('HF_KEY', ''))
 openai_client = OpenAI(api_key=openai_api_key)
 claude_client = anthropic.Anthropic(api_key=anthropic_key)
-# 3. Session State Management
 if 'transcript_history' not in st.session_state:
     st.session_state['transcript_history'] = []
 if 'chat_history' not in st.session_state:
@@ -53,24 +70,30 @@ if 'messages' not in st.session_state:
     st.session_state['messages'] = []
 if 'last_voice_input' not in st.session_state:
     st.session_state['last_voice_input'] = ""
-if 'editing_file' not in st.session_state:
-    st.session_state['editing_file'] = None
 if 'current_audio' not in st.session_state:
     st.session_state['current_audio'] = None
 if 'autoplay_audio' not in st.session_state:
     st.session_state['autoplay_audio'] = True
 if 'should_rerun' not in st.session_state:
     st.session_state['should_rerun'] = False
-if 'old_val' not in st.session_state:
-    st.session_state['old_val'] = None
-# 4. Style Definitions
 st.markdown("""
 <style>
-    .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
-    .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
     .stButton>button {
-        margin-right: 0.5rem;
         background-color: #4CAF50;
         color: white;
         padding: 0.5rem 1rem;
@@ -85,27 +108,37 @@ st.markdown("""
         margin: 1rem 0;
         padding: 1rem;
         border-radius: 10px;
-        background: white;
         box-shadow: 0 2px 4px rgba(0,0,0,0.1);
     }
-    .file-manager {
         padding: 1rem;
         background: white;
         border-radius: 10px;
         margin: 1rem 0;
     }
 </style>
 """, unsafe_allow_html=True)
-FILE_EMOJIS = {
-    "md": "📝",
-    "mp3": "🎵",
-    "mp4": "🎥",
-    "png": "🖼️",
-    "jpg": "📸"
-}
-# 5. Voice Recognition Component
 def create_voice_component():
     """Create auto-searching voice recognition component"""
     return components.html(
@@ -126,9 +159,8 @@ def create_voice_component():
                     const output = document.getElementById('output');
                     let fullTranscript = '';
                     let lastPauseTime = Date.now();
-                    let pauseThreshold = 1500; // Time in ms to wait before triggering search
-                    // Auto-start on load
                     window.addEventListener('load', () => {
                         setTimeout(() => {
                             try {
@@ -160,10 +192,12 @@ def create_voice_component():
                             interim.textContent = '';
                             output.textContent = fullTranscript;
-                            // Send to Streamlit for processing
                             window.parent.postMessage({
                                 type: 'streamlit:setComponentValue',
-                                value: fullTranscript,
                                 dataType: 'json',
                             }, '*');
                         } else if (interimTranscript) {
@@ -173,7 +207,6 @@ def create_voice_component():
                         output.scrollTop = output.scrollHeight;
                     };
-                    // Check for pauses and trigger search
                     setInterval(() => {
                         if (fullTranscript && Date.now() - lastPauseTime > pauseThreshold) {
                             if (output.dataset.lastProcessed !== fullTranscript) {
@@ -213,127 +246,31 @@ def create_voice_component():
         height=200
     )
-# Available English voices
-ENGLISH_VOICES = [
-    "en-US-AriaNeural",      # Female, conversational
-    "en-US-JennyNeural",     # Female, customer service
-    "en-US-GuyNeural",       # Male, newscast
-    "en-US-RogerNeural",     # Male, calm
-    "en-GB-SoniaNeural",     # British female
-    "en-GB-RyanNeural",      # British male
-    "en-AU-NatashaNeural",   # Australian female
-    "en-AU-WilliamNeural",   # Australian male
-    "en-CA-ClaraNeural",     # Canadian female
-    "en-CA-LiamNeural",      # Canadian male
-    "en-IE-EmilyNeural",     # Irish female
-    "en-IE-ConnorNeural",    # Irish male
-    "en-IN-NeerjaNeural",    # Indian female
-    "en-IN-PrabhatNeural",   # Indian male
-]
-def render_search_interface():
-    """Render main search interface with auto-search voice component"""
-    st.header("🔍 Voice Search")
-    # Voice settings
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        selected_voice = st.selectbox(
-            "Select Voice",
-            ENGLISH_VOICES,
-            index=0,
-            help="Choose the voice for audio responses"
-        )
-    with col2:
-        auto_search = st.checkbox("Auto-Search on Pause", value=True)
-    # Voice component
-    voice_result = create_voice_component()
-    # Handle voice input
-    if voice_result and isinstance(voice_result, (str, dict)):
-        # Extract text and trigger info
-        if isinstance(voice_result, dict):
-            current_text = voice_result.get('text', '')
-            trigger = voice_result.get('trigger')
-        else:
-            current_text = voice_result
-            trigger = None
-        # Process on pause trigger if enabled
-        if auto_search and trigger == 'pause' and current_text:
-            if current_text != st.session_state.get('last_processed_text', ''):
-                st.session_state.last_processed_text = current_text
-                # Show the detected text
-                st.info(f"🎤 Detected: {current_text}")
-                # Perform search
-                try:
-                    with st.spinner("Searching and generating audio response..."):
-                        response, audio_file = asyncio.run(
-                            process_voice_search(
-                                current_text,
-                                voice=selected_voice
-                            )
-                        )
-                        if response:
-                            st.markdown(response)
-                            if audio_file:
-                                render_audio_result(audio_file, "Search Results")
-                            # Save to history
-                            st.session_state.transcript_history.append({
-                                'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                                'query': current_text,
-                                'response': response,
-                                'audio': audio_file
-                            })
-                except Exception as e:
-                    st.error(f"Error processing search: {str(e)}")
-    # Manual search option
-    with st.expander("📝 Manual Search", expanded=False):
-        query = st.text_input("Search Query:", value=st.session_state.get('last_processed_text', ''))
-        if st.button("🔍 Search"):
-            try:
-                with st.spinner("Searching and generating audio..."):
-                    response, audio_file = asyncio.run(
-                        process_voice_search(
-                            query,
-                            voice=selected_voice
-                        )
-                    )
-                    if response:
-                        st.markdown(response)
-                        if audio_file:
-                            render_audio_result(audio_file)
-            except Exception as e:
-                st.error(f"Error processing search: {str(e)}")
-# 6. Audio Processing Functions
-def get_autoplay_audio_html(audio_path, width="100%"):
-    """Create HTML for autoplaying audio with controls"""
     try:
         with open(audio_path, "rb") as audio_file:
             audio_bytes = audio_file.read()
             audio_b64 = base64.b64encode(audio_bytes).decode()
             return f'''
-                <audio controls autoplay style="width: {width};">
-                    <source src="data:audio/mpeg;base64,{audio_b64}" type="audio/mpeg">
-                    Your browser does not support the audio element.
-                </audio>
-                <div style="margin-top: 5px;">
-                    <a href="data:audio/mpeg;base64,{audio_b64}"
-                       download="{os.path.basename(audio_path)}"
-                       style="text-decoration: none;">
-                       ⬇️ Download Audio
-                    </a>
                 </div>
             '''
     except Exception as e:
         return f"Error loading audio: {str(e)}"
 def clean_for_speech(text: str) -> str:
     """Clean text for speech synthesis"""
     text = text.replace("\n", " ")
@@ -344,7 +281,7 @@ def clean_for_speech(text: str) -> str:
     return text
 async def generate_audio(text, voice="en-US-AriaNeural", rate="+0%", pitch="+0Hz"):
-    """Generate audio using Edge TTS"""
     text = clean_for_speech(text)
     if not text.strip():
         return None
@@ -361,38 +298,24 @@ def render_audio_result(audio_file, title="Generated Audio"):
     """Render audio result with autoplay in Streamlit"""
     if audio_file and os.path.exists(audio_file):
         st.markdown(f"### {title}")
-        st.markdown(get_autoplay_audio_html(audio_file), unsafe_allow_html=True)
-# 7. File Operations
-def generate_filename(text, response="", file_type="md"):
-    """Generate intelligent filename"""
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    safe_text = re.sub(r'[^\w\s-]', '', text[:50])
-    return f"{timestamp}_{safe_text}.{file_type}"
-def create_file(text, response, file_type="md"):
-    """Create file with content"""
-    filename = generate_filename(text, response, file_type)
-    with open(filename, 'w', encoding='utf-8') as f:
-        f.write(f"{text}\n\n{response}")
-    return filename
-def get_download_link(file_path):
-    """Generate download link for file"""
-    with open(file_path, "rb") as file:
-        contents = file.read()
-    b64 = base64.b64encode(contents).decode()
-    file_name = os.path.basename(file_path)
-    return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">⬇️ Download {file_name}</a>'
-# 8. Search and Process Functions
-def perform_arxiv_search(query, response_type="summary"):
-    """Enhanced Arxiv search with voice response"""
     client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
-    # Get search results and AI interpretation
     refs = client.predict(
-        query, 20, "Semantic Search",
         "mistralai/Mixtral-8x7B-Instruct-v0.1",
         api_name="/update_with_rag_md"
     )[0]
@@ -404,279 +327,237 @@ def perform_arxiv_search(query, response_type="summary"):
         api_name="/ask_llm"
     )
-    # Format response
-    response = f"### 🔎 Search Results for: {query}\n\n{summary}\n\n### 📚 References\n\n{refs}"
     return response, refs
-async def process_voice_search(query):
-    """Process voice search with automatic audio"""
-    response, refs = perform_arxiv_search(query)
-    # Generate audio from response
-    audio_file = await generate_audio(response)
-    # Update state
-    st.session_state.current_audio = audio_file
-    return response, audio_file
-def process_with_gpt(text):
-    """Process text with GPT-4"""
-    if not text:
-        return
-    st.session_state.messages.append({"role": "user", "content": text})
-    with st.chat_message("user"):
-        st.markdown(text)
-    with st.chat_message("assistant"):
-        response = openai_client.chat.completions.create(
-            model=st.session_state.openai_model,
-            messages=st.session_state.messages,
-            stream=False
-        )
-        answer = response.choices[0].message.content
-        st.write(f"GPT-4: {answer}")
-        # Generate audio response
-        audio_file = asyncio.run(generate_audio(answer))
-        if audio_file:
-            render_audio_result(audio_file, "GPT-4 Response")
-        # Save response
-        create_file(text, answer, "md")
-        st.session_state.messages.append({"role": "assistant", "content": answer})
-    return answer
-def process_with_claude(text):
-    """Process text with Claude"""
-    if not text:
-        return
-    with st.chat_message("user"):
-        st.markdown(text)
-    with st.chat_message("assistant"):
-        response = claude_client.messages.create(
-            model="claude-3-sonnet-20240229",
-            max_tokens=1000,
-            messages=[{"role": "user", "content": text}]
-        )
-        answer = response.content[0].text
-        st.write(f"Claude-3: {answer}")
-        # Generate audio response
-        audio_file = asyncio.run(generate_audio(answer))
-        if audio_file:
-            render_audio_result(audio_file, "Claude Response")
-        # Save response
-        create_file(text, answer, "md")
-        st.session_state.chat_history.append({"user": text, "claude": answer})
-    return answer
-# 9. UI Components
 def render_search_interface():
-    """Render main search interface with voice component"""
-    st.header("🔍 Voice Search")
-    # Voice component with autorun
-    voice_text = create_voice_component()
-    # Handle voice input
-    if voice_text and isinstance(voice_text, (str, dict)):
-        # Convert dict to string if necessary
-        current_text = voice_text if isinstance(voice_text, str) else voice_text.get('value', '')
-        # Compare with last processed text
-        if current_text and current_text != st.session_state.get('last_voice_text', ''):
-            st.session_state.last_voice_text = current_text
-            # Clean the text
-            cleaned_text = current_text.replace('\n', ' ').strip()
-            # Process with selected model
-            if st.session_state.autoplay_audio and cleaned_text:
-                try:
-                    response, audio_file = asyncio.run(process_voice_search(cleaned_text))
-                    if response:
-                        st.markdown(response)
-                        if audio_file:
-                            render_audio_result(audio_file, "Search Results")
-                except Exception as e:
-                    st.error(f"Error processing voice search: {str(e)}")
-    # Manual search option
-    with st.expander("📝 Manual Search", expanded=False):
-        col1, col2 = st.columns([3, 1])
-        with col1:
-            query = st.text_input("Enter search query:")
-        with col2:
-            if st.button("🔍 Search"):
-                try:
-                    response, audio_file = asyncio.run(process_voice_search(query))
-                    if response:
-                        st.markdown(response)
-                        if audio_file:
-                            render_audio_result(audio_file)
-                except Exception as e:
-                    st.error(f"Error processing search: {str(e)}")
-def display_file_manager():
-    """Display file manager with media preview"""
-    st.sidebar.title("📁 File Manager")
-    files = {
-        'Documents': glob.glob("*.md"),
-        'Audio': glob.glob("*.mp3"),
-        'Video': glob.glob("*.mp4"),
-        'Images': glob.glob("*.png") + glob.glob("*.jpg")
-    }
-    # Top actions
-    col1, col2 = st.sidebar.columns(2)
     with col1:
-        if st.button("🗑 Delete All"):
-            for category in files.values():
-                for file in category:
-                    os.remove(file)
-            st.rerun()
     with col2:
-        if st.button("⬇️ Download All"):
-            zip_name = f"archive_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
-            with zipfile.ZipFile(zip_name, 'w') as zipf:
-                for category in files.values():
-                    for file in category:
-                        zipf.write(file)
-            st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)
-    # Display files by category
-    for category, category_files in files.items():
-        if category_files:
-            with st.sidebar.expander(f"{FILE_EMOJIS.get(category.lower(), '📄')} {category} ({len(category_files)})", expanded=True):
-                for file in sorted(category_files, key=os.path.getmtime, reverse=True):
-                    col1, col2, col3 = st.columns([3, 1, 1])
-                    with col1:
-                        st.markdown(f"**{os.path.basename(file)}**")
-                    with col2:
-                        st.markdown(get_download_link(file), unsafe_allow_html=True)
-                    with col3:
-                        if st.button("🗑", key=f"del_{file}"):
-                            os.remove(file)
-                            st.rerun()
-def display_media_gallery():
-    """Display media files in gallery format"""
-    media_tabs = st.tabs(["🎵 Audio", "🎥 Video", "📷 Images"])
-    with media_tabs[0]:
-        audio_files = glob.glob("*.mp3")
-        if audio_files:
-            for audio_file in audio_files:
-                st.markdown(get_autoplay_audio_html(audio_file), unsafe_allow_html=True)
         else:
-            st.write("No audio files found")
-    with media_tabs[1]:
-        video_files = glob.glob("*.mp4")
-        if video_files:
-            cols = st.columns(2)
-            for idx, video_file in enumerate(video_files):
-                with cols[idx % 2]:
-                    st.video(video_file)
-        else:
-            st.write("No video files found")
-    with media_tabs[2]:
-        image_files = glob.glob("*.png") + glob.glob("*.jpg")
-        if image_files:
-            cols = st.columns(3)
-            for idx, image_file in enumerate(image_files):
-                with cols[idx % 3]:
-                    st.image(Image.open(image_file), use_column_width=True)
-                    if st.button(f"Analyze {os.path.basename(image_file)}", key=f"analyze_{image_file}"):
-                        with st.spinner("Analyzing image..."):
-                            analysis = process_with_gpt(f"Analyze this image: {image_file}")
-                            st.markdown(analysis)
-        else:
-            st.write("No images found")
-def display_search_history():
-    """Display search history with audio playback"""
-    st.header("Search History")
-    history_tabs = st.tabs(["🔍 Voice Searches", "💬 Chat History"])
-    with history_tabs[0]:
-        for entry in reversed(st.session_state.transcript_history):
-            with st.expander(f"🔍 {entry['timestamp']} - {entry['query'][:50]}...", expanded=False):
-                st.markdown(entry['response'])
-                if entry.get('audio'):
-                    render_audio_result(entry['audio'], "Recorded Response")
-    with history_tabs[1]:
-        chat_tabs = st.tabs(["Claude History", "GPT-4 History"])
-        with chat_tabs[0]:
-            for chat in st.session_state.chat_history:
-                st.markdown(f"**You:** {chat['user']}")
-                st.markdown(f"**Claude:** {chat['claude']}")
-                st.markdown("---")
-        with chat_tabs[1]:
-            for msg in st.session_state.messages:
-                with st.chat_message(msg["role"]):
-                    st.markdown(msg["content"])
-# Main Application
 def main():
-    st.title("🔬 Research Assistant Pro")
-    # Initialize autorun setting
-    if 'autorun' not in st.session_state:
-        st.session_state.autorun = True
-    # Settings sidebar
     with st.sidebar:
         st.title("⚙️ Settings")
-        st.session_state.autorun = st.checkbox("Enable Autorun", value=True)
-        st.subheader("Voice Settings")
-        voice_options = [
-            "en-US-AriaNeural",
-            "en-US-GuyNeural",
-            "en-GB-SoniaNeural",
-            "en-AU-NatashaNeural"
-        ]
-        selected_voice = st.selectbox("Select Voice", voice_options)
         st.subheader("Audio Settings")
-        rate = st.slider("Speech Rate", -50, 50, 0, 5)
-        pitch = st.slider("Pitch", -50, 50, 0, 5)
         st.session_state.autoplay_audio = st.checkbox(
             "Autoplay Audio",
             value=True,
             help="Automatically play audio when generated"
         )
     # Main content tabs
     tabs = st.tabs(["🎤 Voice Search", "📚 History", "🎵 Media", "⚙️ Advanced"])
     with tabs[0]:
         render_search_interface()
     with tabs[1]:
-        display_search_history()
     with tabs[2]:
-        display_media_gallery()
     with tabs[3]:
         st.header("Advanced Settings")
@@ -684,41 +565,50 @@ def main():
         with col1:
             st.subheader("Model Settings")
             st.selectbox(
-                "Default Search Model",
-                ["Claude-3", "GPT-4", "Mixtral-8x7B"],
                 key="default_model"
             )
             st.number_input(
-                "Max Results",
-                min_value=5,
-                max_value=50,
-                value=20,
-                key="max_results"
             )
         with col2:
-            st.subheader("Audio Settings")
             st.slider(
-                "Max Audio Duration (seconds)",
-                min_value=30,
-                max_value=300,
-                value=120,
-                step=30,
-                key="max_audio_duration"
             )
             st.checkbox(
-                "High Quality Audio",
                 value=True,
                 key="high_quality_audio"
             )
-    # File manager sidebar
-    display_file_manager()
-    # Handle rerun if needed
-    if st.session_state.get('should_rerun', False):
-        st.session_state.should_rerun = False
-        st.rerun()
 if __name__ == "__main__":
-    main()

 import asyncio
 import edge_tts
+# Available English voices
+ENGLISH_VOICES = [
+    "en-US-AriaNeural",      # Female, conversational
+    "en-US-JennyNeural",     # Female, customer service
+    "en-US-GuyNeural",       # Male, newscast
+    "en-US-RogerNeural",     # Male, calm
+    "en-GB-SoniaNeural",     # British female
+    "en-GB-RyanNeural",      # British male
+    "en-AU-NatashaNeural",   # Australian female
+    "en-AU-WilliamNeural",   # Australian male
+    "en-CA-ClaraNeural",     # Canadian female
+    "en-CA-LiamNeural",      # Canadian male
+    "en-IE-EmilyNeural",     # Irish female
+    "en-IE-ConnorNeural",    # Irish male
+    "en-IN-NeerjaNeural",    # Indian female
+    "en-IN-PrabhatNeural",   # Indian male
+]
+# Core Configuration & Setup
 st.set_page_config(
+    page_title="ARIA Research Assistant",
+    page_icon="🔬",
     layout="wide",
     initial_sidebar_state="auto",
     menu_items={
         'Get Help': 'https://huggingface.co/awacke1',
         'Report a bug': 'https://huggingface.co/spaces/awacke1',
+        'About': "ARIA: Academic Research Interactive Assistant"
     }
 )
 load_dotenv()
+# API Setup
 openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', ''))
 anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', st.secrets.get('ANTHROPIC_API_KEY', ''))
 openai_client = OpenAI(api_key=openai_api_key)
 claude_client = anthropic.Anthropic(api_key=anthropic_key)
+# Session State Management
 if 'transcript_history' not in st.session_state:
     st.session_state['transcript_history'] = []
 if 'chat_history' not in st.session_state:
     st.session_state['messages'] = []
 if 'last_voice_input' not in st.session_state:
     st.session_state['last_voice_input'] = ""
 if 'current_audio' not in st.session_state:
     st.session_state['current_audio'] = None
 if 'autoplay_audio' not in st.session_state:
     st.session_state['autoplay_audio'] = True
 if 'should_rerun' not in st.session_state:
     st.session_state['should_rerun'] = False
+if 'autorun' not in st.session_state:
+    st.session_state.autorun = True
+if 'run_option' not in st.session_state:
+    st.session_state.run_option = "Arxiv"
+if 'last_processed_text' not in st.session_state:
+    st.session_state.last_processed_text = ""
+# Custom CSS
 st.markdown("""
 <style>
+    .main {
+        background: linear-gradient(135deg, #1a1a1a, #2d2d2d);
+        color: #ffffff;
+    }
+    .stMarkdown {
+        font-family: 'Helvetica Neue', sans-serif;
+    }
     .stButton>button {
         background-color: #4CAF50;
         color: white;
         padding: 0.5rem 1rem;
         margin: 1rem 0;
         padding: 1rem;
         border-radius: 10px;
+        background: #f5f5f5;
         box-shadow: 0 2px 4px rgba(0,0,0,0.1);
     }
+    .voice-container {
         padding: 1rem;
         background: white;
         border-radius: 10px;
         margin: 1rem 0;
     }
+    .text-display {
+        margin: 1rem 0;
+        padding: 1rem;
+        background: #f9f9f9;
+        border-radius: 5px;
+        font-size: 1.1em;
+    }
+    .model-selector {
+        margin: 1rem 0;
+        padding: 0.5rem;
+        background: #ffffff;
+        border-radius: 5px;
+    }
+    .response-container {
+        margin-top: 2rem;
+        padding: 1rem;
+        background: rgba(255, 255, 255, 0.05);
+        border-radius: 10px;
+    }
 </style>
 """, unsafe_allow_html=True)
 def create_voice_component():
     """Create auto-searching voice recognition component"""
     return components.html(
                     const output = document.getElementById('output');
                     let fullTranscript = '';
                     let lastPauseTime = Date.now();
+                    let pauseThreshold = 1500;
                     window.addEventListener('load', () => {
                         setTimeout(() => {
                             try {
                             interim.textContent = '';
                             output.textContent = fullTranscript;
                             window.parent.postMessage({
                                 type: 'streamlit:setComponentValue',
+                                value: {
+                                    text: fullTranscript,
+                                    trigger: 'speech'
+                                },
                                 dataType: 'json',
                             }, '*');
                         } else if (interimTranscript) {
                         output.scrollTop = output.scrollHeight;
                     };
                     setInterval(() => {
                         if (fullTranscript && Date.now() - lastPauseTime > pauseThreshold) {
                             if (output.dataset.lastProcessed !== fullTranscript) {
         height=200
     )
+def get_audio_autoplay_html(audio_path):
+    """Create HTML for autoplaying audio with controls and download"""
     try:
         with open(audio_path, "rb") as audio_file:
             audio_bytes = audio_file.read()
             audio_b64 = base64.b64encode(audio_bytes).decode()
             return f'''
+                <div class="audio-player">
+                    <audio controls autoplay style="width: 100%;">
+                        <source src="data:audio/mpeg;base64,{audio_b64}" type="audio/mpeg">
+                        Your browser does not support the audio element.
+                    </audio>
+                    <div style="margin-top: 5px;">
+                        <a href="data:audio/mpeg;base64,{audio_b64}"
+                           download="{os.path.basename(audio_path)}"
+                           style="text-decoration: none; color: #4CAF50;">
+                           ⬇️ Download Audio
+                        </a>
+                    </div>
                 </div>
             '''
     except Exception as e:
         return f"Error loading audio: {str(e)}"
+# Audio Processing Functions
 def clean_for_speech(text: str) -> str:
     """Clean text for speech synthesis"""
     text = text.replace("\n", " ")
     return text
 async def generate_audio(text, voice="en-US-AriaNeural", rate="+0%", pitch="+0Hz"):
+    """Generate audio using Edge TTS with automatic playback"""
     text = clean_for_speech(text)
     if not text.strip():
         return None
     """Render audio result with autoplay in Streamlit"""
     if audio_file and os.path.exists(audio_file):
         st.markdown(f"### {title}")
+        st.markdown(get_audio_autoplay_html(audio_file), unsafe_allow_html=True)
+async def process_voice_search(query, voice="en-US-AriaNeural"):
+    """Process voice search with automatic audio using selected voice"""
+    response, refs = perform_arxiv_search(query)
+    audio_file = await generate_audio(response, voice=voice)
+    st.session_state.current_audio = audio_file
+    return response, audio_file
+# Arxiv Search Functions
+def perform_arxiv_search(query):
+    """Enhanced Arxiv search with summary"""
     client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
     refs = client.predict(
+        query, 20, "Semantic Search",
         "mistralai/Mixtral-8x7B-Instruct-v0.1",
         api_name="/update_with_rag_md"
     )[0]
         api_name="/ask_llm"
     )
+    response = f"### Search Results for: {query}\n\n{summary}\n\n### References\n\n{refs}"
     return response, refs
+def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True,
+                     full_audio=False, voice="en-US-AriaNeural"):
+    """Full Arxiv search with audio summaries"""
+    start = time.time()
+    response, refs = perform_arxiv_search(q)
+    st.markdown(response)
+    # Generate audio responses
+    if full_audio:
+        audio_file = asyncio.run(generate_audio(response, voice=voice))
+        if audio_file:
+            render_audio_result(audio_file, "Complete Response")
+    if vocal_summary:
+        summary_audio = asyncio.run(generate_audio(
+            f"Summary of results for query: {q}",
+            voice=voice
+        ))
+        if summary_audio:
+            render_audio_result(summary_audio, "Summary")
+    elapsed = time.time() - start
+    st.write(f"**Total Elapsed:** {elapsed:.2f} s")
+    return response
 def render_search_interface():
+    """Main search interface with voice recognition and model selection"""
+    st.header("🔍 Voice Search & Research")
+    # Voice and model settings
+    col1, col2, col3 = st.columns([2, 1, 1])
     with col1:
+        selected_voice = st.selectbox(
+            "Select Voice",
+            ENGLISH_VOICES,
+            index=0,
+            help="Choose the voice for audio responses"
+        )
     with col2:
+        run_option = st.selectbox(
+            "Model:",
+            ["Arxiv", "GPT-4o", "Claude-3.5"],
+            key="run_option"
+        )
+    with col3:
+        autorun = st.checkbox("⚙ AutoRun", value=True, key="autorun")
+    # Voice component
+    voice_result = create_voice_component()
+    # Handle voice input with autorun
+    if voice_result and isinstance(voice_result, (str, dict)):
+        # Extract text and trigger info
+        if isinstance(voice_result, dict):
+            current_text = voice_result.get('text', '')
+            trigger = voice_result.get('trigger')
         else:
+            current_text = voice_result
+            trigger = None
+        # Show text in edit box
+        edited_input = st.text_area(
+            "✏️ Edit Input:",
+            value=current_text,
+            height=100,
+            key="edited_input"
+        )
+        # Check if input has changed
+        input_changed = (edited_input != st.session_state.get('last_processed_text', ''))
+        # Process based on autorun and model selection
+        if autorun and input_changed and edited_input:
+            st.session_state.last_processed_text = edited_input
+            try:
+                with st.spinner("Processing..."):
+                    if run_option == "Arxiv":
+                        result = perform_ai_lookup(
+                            edited_input,
+                            vocal_summary=True,
+                            extended_refs=False,
+                            titles_summary=True,
+                            full_audio=True,
+                            voice=selected_voice
+                        )
+                    elif run_option == "GPT-4o":
+                        result = process_with_gpt(edited_input)
+                        # Generate audio for GPT response
+                        audio_file = asyncio.run(generate_audio(result, voice=selected_voice))
+                        if audio_file:
+                            render_audio_result(audio_file, "GPT-4 Response")
+                    elif run_option == "Claude-3.5":
+                        result = process_with_claude(edited_input)
+                        # Generate audio for Claude response
+                        audio_file = asyncio.run(generate_audio(result, voice=selected_voice))
+                        if audio_file:
+                            render_audio_result(audio_file, "Claude Response")
+                    # Save to history
+                    st.session_state.transcript_history.append({
+                        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        'query': edited_input,
+                        'response': result,
+                        'model': run_option
+                    })
+            except Exception as e:
+                st.error(f"Error processing request: {str(e)}")
+        # Manual run button
+        elif st.button("▶ Run"):
+            try:
+                with st.spinner("Processing..."):
+                    if run_option == "Arxiv":
+                        result = perform_ai_lookup(
+                            edited_input,
+                            vocal_summary=True,
+                            extended_refs=False,
+                            titles_summary=True,
+                            full_audio=True,
+                            voice=selected_voice
+                        )
+                    elif run_option == "GPT-4o":
+                        result = process_with_gpt(edited_input)
+                        audio_file = asyncio.run(generate_audio(result, voice=selected_voice))
+                        if audio_file:
+                            render_audio_result(audio_file, "GPT-4 Response")
+                    elif run_option == "Claude-3.5":
+                        result = process_with_claude(edited_input)
+                        audio_file = asyncio.run(generate_audio(result, voice=selected_voice))
+                        if audio_file:
+                            render_audio_result(audio_file, "Claude Response")
+                    # Save to history
+                    st.session_state.transcript_history.append({
+                        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        'query': edited_input,
+                        'response': result,
+                        'model': run_option
+                    })
+            except Exception as e:
+                st.error(f"Error processing request: {str(e)}")
 def main():
+    st.title("🔬 ARIA Research Assistant")
+    # Initialize settings
     with st.sidebar:
         st.title("⚙️ Settings")
+        # Audio Settings
         st.subheader("Audio Settings")
         st.session_state.autoplay_audio = st.checkbox(
             "Autoplay Audio",
             value=True,
             help="Automatically play audio when generated"
         )
+        rate = st.slider("Speech Rate", -50, 50, 0, 5)
+        pitch = st.slider("Pitch", -50, 50, 0, 5)
+        # Advanced Settings
+        st.subheader("Advanced")
+        save_history = st.checkbox(
+            "Save History",
+            value=True,
+            help="Save transcripts and responses"
+        )
+        cleanup_old = st.checkbox(
+            "Auto Cleanup",
+            value=False,
+            help="Remove old files automatically"
+        )
     # Main content tabs
     tabs = st.tabs(["🎤 Voice Search", "📚 History", "🎵 Media", "⚙️ Advanced"])
     with tabs[0]:
         render_search_interface()
     with tabs[1]:
+        st.header("Search History")
+        if st.session_state.transcript_history:
+            for entry in reversed(st.session_state.transcript_history):
+                with st.expander(
+                    f"🔍 {entry['timestamp']} - {entry['query'][:50]}...",
+                    expanded=False
+                ):
+                    st.markdown(f"**Model:** {entry['model']}")
+                    st.markdown(entry['response'])
     with tabs[2]:
+        st.header("Media Files")
+        media_tabs = st.tabs(["🎵 Audio", "🎥 Video", "📷 Images"])
+        with media_tabs[0]:
+            audio_files = glob.glob("*.mp3")
+            if audio_files:
+                for audio_file in sorted(audio_files, key=os.path.getmtime, reverse=True):
+                    st.markdown(get_audio_autoplay_html(audio_file), unsafe_allow_html=True)
+            else:
+                st.write("No audio files found")
+        with media_tabs[1]:
+            video_files = glob.glob("*.mp4")
+            if video_files:
+                cols = st.columns(2)
+                for idx, video_file in enumerate(video_files):
+                    with cols[idx % 2]:
+                        st.video(video_file)
+            else:
+                st.write("No video files found")
+        with media_tabs[2]:
+            image_files = glob.glob("*.png") + glob.glob("*.jpg")
+            if image_files:
+                cols = st.columns(3)
+                for idx, image_file in enumerate(image_files):
+                    with cols[idx % 3]:
+                        st.image(Image.open(image_file), use_column_width=True)
+            else:
+                st.write("No images found")
     with tabs[3]:
         st.header("Advanced Settings")
         with col1:
             st.subheader("Model Settings")
             st.selectbox(
+                "Default Model",
+                ["Arxiv", "GPT-4o", "Claude-3.5"],
                 key="default_model"
             )
             st.number_input(
+                "Max Response Length",
+                min_value=100,
+                max_value=2000,
+                value=1000,
+                key="max_tokens"
             )
         with col2:
+            st.subheader("Voice Settings")
             st.slider(
+                "Pause Detection (ms)",
+                min_value=500,
+                max_value=3000,
+                value=1500,
+                step=100,
+                key="pause_threshold"
             )
             st.checkbox(
+                "High Quality Voice",
                 value=True,
                 key="high_quality_audio"
             )
+# Cleanup utility
+def cleanup_old_files(days=7):
+    """Remove files older than specified days"""
+    current_time = time.time()
+    for pattern in ["*.md", "*.mp3"]:
+        for f in glob.glob(pattern):
+            creation_time = os.path.getctime(f)
+            if (current_time - creation_time) // (24 * 3600) >= days:
+                try:
+                    os.remove(f)
+                except:
+                    pass
 if __name__ == "__main__":
+    if st.session_state.get('cleanup_enabled', False):
+        cleanup_old_files()
+    main()