import streamlit as st import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile import plotly.graph_objects as go import streamlit.components.v1 as components from datetime import datetime from audio_recorder_streamlit import audio_recorder from bs4 import BeautifulSoup from collections import defaultdict, deque from dotenv import load_dotenv from gradio_client import Client from huggingface_hub import InferenceClient from io import BytesIO from PIL import Image from PyPDF2 import PdfReader from urllib.parse import quote from xml.etree import ElementTree as ET from openai import OpenAI import extra_streamlit_components as stx from streamlit.runtime.scriptrunner import get_script_run_ctx import asyncio import edge_tts # Available English voices ENGLISH_VOICES = [ "en-US-AriaNeural", # Female, conversational "en-US-JennyNeural", # Female, customer service "en-US-GuyNeural", # Male, newscast "en-US-RogerNeural", # Male, calm "en-GB-SoniaNeural", # British female "en-GB-RyanNeural", # British male "en-AU-NatashaNeural", # Australian female "en-AU-WilliamNeural", # Australian male "en-CA-ClaraNeural", # Canadian female "en-CA-LiamNeural", # Canadian male "en-IE-EmilyNeural", # Irish female "en-IE-ConnorNeural", # Irish male "en-IN-NeerjaNeural", # Indian female "en-IN-PrabhatNeural", # Indian male ] # Core Configuration & Setup st.set_page_config( page_title="ARIA Research Assistant", page_icon="🔬", layout="wide", initial_sidebar_state="auto", menu_items={ 'Get Help': 'https://huggingface.co/awacke1', 'Report a bug': 'https://huggingface.co/spaces/awacke1', 'About': "ARIA: Academic Research Interactive Assistant" } ) load_dotenv() # API Setup openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', '')) anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', st.secrets.get('ANTHROPIC_API_KEY', '')) openai_client = OpenAI(api_key=openai_api_key) claude_client = anthropic.Anthropic(api_key=anthropic_key) # Session State Management if 'transcript_history' not in st.session_state: st.session_state['transcript_history'] = [] if 'chat_history' not in st.session_state: st.session_state['chat_history'] = [] if 'openai_model' not in st.session_state: st.session_state['openai_model'] = "gpt-4-vision-preview" if 'messages' not in st.session_state: st.session_state['messages'] = [] if 'last_voice_input' not in st.session_state: st.session_state['last_voice_input'] = "" if 'current_audio' not in st.session_state: st.session_state['current_audio'] = None if 'autoplay_audio' not in st.session_state: st.session_state['autoplay_audio'] = True if 'should_rerun' not in st.session_state: st.session_state['should_rerun'] = False if 'autorun' not in st.session_state: st.session_state.autorun = True if 'run_option' not in st.session_state: st.session_state.run_option = "Arxiv" if 'last_processed_text' not in st.session_state: st.session_state.last_processed_text = "" # Custom CSS st.markdown(""" """, unsafe_allow_html=True) def create_voice_component(): """Create auto-searching voice recognition component""" return components.html( """
Starting voice recognition...
""", height=200 ) def get_audio_autoplay_html(audio_path): """Create HTML for autoplaying audio with controls and download""" try: with open(audio_path, "rb") as audio_file: audio_bytes = audio_file.read() audio_b64 = base64.b64encode(audio_bytes).decode() return f'''
⬇️ Download Audio
''' except Exception as e: return f"Error loading audio: {str(e)}" # Audio Processing Functions def clean_for_speech(text: str) -> str: """Clean text for speech synthesis""" text = text.replace("\n", " ") text = text.replace("", " ") text = text.replace("#", "") text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text) text = re.sub(r"\s+", " ", text).strip() return text async def generate_audio(text, voice="en-US-AriaNeural", rate="+0%", pitch="+0Hz"): """Generate audio using Edge TTS with automatic playback""" text = clean_for_speech(text) if not text.strip(): return None timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_file = f"response_{timestamp}.mp3" communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) await communicate.save(output_file) return output_file def render_audio_result(audio_file, title="Generated Audio"): """Render audio result with autoplay in Streamlit""" if audio_file and os.path.exists(audio_file): st.markdown(f"### {title}") st.markdown(get_audio_autoplay_html(audio_file), unsafe_allow_html=True) async def process_voice_search(query, voice="en-US-AriaNeural"): """Process voice search with automatic audio using selected voice""" response, refs = perform_arxiv_search(query) audio_file = await generate_audio(response, voice=voice) st.session_state.current_audio = audio_file return response, audio_file # Arxiv Search Functions def perform_arxiv_search(query): """Enhanced Arxiv search with summary""" client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") refs = client.predict( query, 20, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md" )[0] summary = client.predict( query, "mistralai/Mixtral-8x7B-Instruct-v0.1", True, api_name="/ask_llm" ) response = f"### Search Results for: {query}\n\n{summary}\n\n### References\n\n{refs}" return response, refs def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, voice="en-US-AriaNeural"): """Full Arxiv search with audio summaries""" start = time.time() response, refs = perform_arxiv_search(q) st.markdown(response) # Generate audio responses if full_audio: audio_file = asyncio.run(generate_audio(response, voice=voice)) if audio_file: render_audio_result(audio_file, "Complete Response") if vocal_summary: summary_audio = asyncio.run(generate_audio( f"Summary of results for query: {q}", voice=voice )) if summary_audio: render_audio_result(summary_audio, "Summary") elapsed = time.time() - start st.write(f"**Total Elapsed:** {elapsed:.2f} s") return response def render_search_interface(): """Main search interface with voice recognition and model selection""" st.header("🔍 Voice Search & Research") # Get voice component value and set up model selection mycomponent = components.declare_component("mycomponent", path="mycomponent") val = mycomponent(my_input_value="Hello") # Show input in edit box if detected if val: val_stripped = val.replace('\n', ' ') edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100) run_option = st.selectbox("Model:", ["Arxiv", "GPT-4o", "Claude-3.5"]) col1, col2 = st.columns(2) with col1: autorun = st.checkbox("⚙ AutoRun", value=True) with col2: full_audio = st.checkbox("📚FullAudio", value=False, help="Generate full audio response") input_changed = (val != st.session_state.get('old_val', None)) if autorun and input_changed: st.session_state.old_val = val if run_option == "Arxiv": perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=full_audio) else: if run_option == "GPT-4o": process_with_gpt(edited_input) elif run_option == "Claude-3.5": process_with_claude(edited_input) else: if st.button("▶ Run"): st.session_state.old_val = val if run_option == "Arxiv": perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=full_audio) else: if run_option == "GPT-4o": process_with_gpt(edited_input) elif run_option == "Claude-3.5": process_with_claude(edited_input) def main(): st.sidebar.markdown("### 🚲BikeAI🏆 Multi-Agent Research") tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"], horizontal=True) if tab_main == "🎤 Voice": render_search_interface() elif tab_main == "🔍 ArXiv": st.subheader("🔍 Query ArXiv") q = st.text_input("🔍 Query:") st.markdown("### 🎛 Options") vocal_summary = st.checkbox("🎙ShortAudio", value=True) extended_refs = st.checkbox("📜LongRefs", value=False) titles_summary = st.checkbox("🔖TitlesOnly", value=True) full_audio = st.checkbox("📚FullAudio", value=False, help="Full audio of results") full_transcript = st.checkbox("🧾FullTranscript", value=False, help="Generate a full transcript file") if q and st.button("🔍Run"): result = perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs, titles_summary=titles_summary, full_audio=full_audio) if full_transcript: save_full_transcript(q, result) st.markdown("### Change Prompt & Re-Run") q_new = st.text_input("🔄 Modify Query:") if q_new and st.button("🔄 Re-Run with Modified Query"): result = perform_ai_lookup(q_new, vocal_summary=vocal_summary, extended_refs=extended_refs, titles_summary=titles_summary, full_audio=full_audio) if full_transcript: save_full_transcript(q_new, result) elif tab_main == "📸 Media": st.header("📸 Images & 🎥 Videos") tabs = st.tabs(["🖼 Images", "🎥 Video"]) with tabs[0]: imgs = glob.glob("*.png")+glob.glob("*.jpg") if imgs: c = st.slider("Cols",1,5,3) cols = st.columns(c) for i,f in enumerate(imgs): with cols[i%c]: st.image(Image.open(f),use_container_width=True) if st.button(f"👀 Analyze {os.path.basename(f)}", key=f"analyze_{f}"): a = process_image(f,"Describe this image.") st.markdown(a) else: st.write("No images found.") with tabs[1]: vids = glob.glob("*.mp4") if vids: for v in vids: with st.expander(f"🎥 {os.path.basename(v)}"): st.video(v) if st.button(f"Analyze {os.path.basename(v)}", key=f"analyze_{v}"): a = process_video_with_gpt(v,"Describe video.") st.markdown(a) else: st.write("No videos found.") elif tab_main == "📝 Editor": if getattr(st.session_state,'current_file',None): st.subheader(f"Editing: {st.session_state.current_file}") new_text = st.text_area("✏️ Content:", st.session_state.file_content, height=300) if st.button("💾 Save"): with open(st.session_state.current_file,'w',encoding='utf-8') as f: f.write(new_text) st.success("Updated!") st.session_state.should_rerun = True else: st.write("Select a file from the sidebar to edit.") groups, sorted_prefixes = load_files_for_sidebar() display_file_manager_sidebar(groups, sorted_prefixes) if st.session_state.viewing_prefix and st.session_state.viewing_prefix in groups: st.write("---") st.write(f"**Viewing Group:** {st.session_state.viewing_prefix}") for f in groups[st.session_state.viewing_prefix]: fname = os.path.basename(f) ext = os.path.splitext(fname)[1].lower().strip('.') st.write(f"### {fname}") if ext == "md": content = open(f,'r',encoding='utf-8').read() st.markdown(content) elif ext == "mp3": st.audio(f) else: st.markdown(get_download_link(f), unsafe_allow_html=True) if st.button("❌ Close"): st.session_state.viewing_prefix = None if st.session_state.should_rerun: st.session_state.should_rerun = False st.rerun()