import streamlit as st import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile from datetime import datetime from audio_recorder_streamlit import audio_recorder from collections import defaultdict, Counter from dotenv import load_dotenv from gradio_client import Client from huggingface_hub import InferenceClient from PIL import Image from openai import OpenAI import asyncio import edge_tts from streamlit_marquee import streamlit_marquee st.set_page_config( page_title="🚲TalkingAIResearcherπŸ†", page_icon="πŸš²πŸ†", layout="wide" ) EDGE_TTS_VOICES = [ "en-US-AriaNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-GB-SoniaNeural" ] FILE_EMOJIS = { "md": "πŸ“", "mp3": "🎡", "wav": "πŸ”Š", "txt": "πŸ“„", "pdf": "πŸ“‘" } # Initialize session states if 'tts_voice' not in st.session_state: st.session_state['tts_voice'] = EDGE_TTS_VOICES[0] if 'audio_format' not in st.session_state: st.session_state['audio_format'] = 'mp3' if 'messages' not in st.session_state: st.session_state['messages'] = [] if 'chat_history' not in st.session_state: st.session_state['chat_history'] = [] if 'viewing_prefix' not in st.session_state: st.session_state['viewing_prefix'] = None if 'should_rerun' not in st.session_state: st.session_state['should_rerun'] = False # API Setup openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY')) @st.cache_resource def get_cached_audio_b64(file_path): with open(file_path, "rb") as f: return base64.b64encode(f.read()).decode() def beautify_filename(filename): name = os.path.splitext(filename)[0] return name.replace('_', ' ').replace('.', ' ') def create_zip_of_files(md_files, mp3_files, wav_files, query=''): all_files = md_files + mp3_files + wav_files if not all_files: return None timestamp = datetime.now().strftime("%y%m_%H%M") zip_name = f"{timestamp}_archive.zip" with zipfile.ZipFile(zip_name, 'w') as z: for f in all_files: z.write(f) return zip_name def get_download_link(file_path, file_type="zip"): with open(file_path, "rb") as f: b64 = base64.b64encode(f.read()).decode() ext_map = {'zip': 'πŸ“¦', 'mp3': '🎡', 'wav': 'πŸ”Š', 'md': 'πŸ“'} emoji = ext_map.get(file_type, '') return f'{emoji} Download {os.path.basename(file_path)}' def load_files_for_sidebar(): files = [f for f in glob.glob("*.*") if not f.lower().endswith('readme.md')] groups = defaultdict(list) for f in files: basename = os.path.basename(f) group_name = basename[:9] if len(basename) >= 9 else 'Other' groups[group_name].append(f) return sorted(groups.items(), key=lambda x: max(os.path.getmtime(f) for f in x[1]), reverse=True) def display_marquee_controls(): st.sidebar.markdown("### 🎯 Marquee Settings") cols = st.sidebar.columns(2) with cols[0]: bg_color = st.color_picker("🎨 Background", "#1E1E1E") text_color = st.color_picker("✍️ Text", "#FFFFFF") with cols[1]: font_size = st.slider("πŸ“ Size", 10, 24, 14) duration = st.slider("⏱️ Speed", 1, 20, 10) return { "background": bg_color, "color": text_color, "font-size": f"{font_size}px", "animationDuration": f"{duration}s", "width": "100%", "lineHeight": "35px" } def display_file_manager_sidebar(groups_sorted): st.sidebar.title("πŸ“š File Manager") all_files = {'md': [], 'mp3': [], 'wav': []} for _, files in groups_sorted: for f in files: ext = os.path.splitext(f)[1].lower().strip('.') if ext in all_files: all_files[ext].append(f) cols = st.sidebar.columns(4) for i, (ext, files) in enumerate(all_files.items()): with cols[i]: if st.button(f"πŸ—‘οΈ {ext.upper()}"): [os.remove(f) for f in files] st.session_state.should_rerun = True if st.sidebar.button("πŸ“¦ Zip All"): zip_name = create_zip_of_files( all_files['md'], all_files['mp3'], all_files['wav'] ) if zip_name: st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True) for group_name, files in groups_sorted: timestamp = (datetime.strptime(group_name, "%y%m_%H%M").strftime("%Y-%m-%d %H:%M") if len(group_name) == 9 else group_name) with st.sidebar.expander(f"πŸ“ {timestamp} ({len(files)})", expanded=True): c1, c2 = st.columns(2) with c1: if st.button("πŸ‘€", key=f"view_{group_name}"): st.session_state.viewing_prefix = group_name with c2: if st.button("πŸ—‘οΈ", key=f"del_{group_name}"): [os.remove(f) for f in files] st.session_state.should_rerun = True for f in files: ext = os.path.splitext(f)[1].lower().strip('.') emoji = FILE_EMOJIS.get(ext, 'πŸ“„') pretty_name = beautify_filename(os.path.basename(f)) st.write(f"{emoji} **{pretty_name}**") if ext in ['mp3', 'wav']: st.audio(f) if st.button("πŸ”„", key=f"loop_{f}"): audio_b64 = get_cached_audio_b64(f) st.components.v1.html( f''' ''', height=0 ) async def edge_tts_generate(text, voice, file_format="mp3"): text = re.sub(r'\s+', ' ', text).strip() if not text: return None communicate = edge_tts.Communicate(text, voice) filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}" await communicate.save(filename) return filename def parse_arxiv_refs(text): papers = [] current_paper = None for line in text.split('\n'): if '|' in line: if current_paper: papers.append(current_paper) parts = line.strip('* ').split('|') current_paper = { 'date': parts[0].strip(), 'title': parts[1].strip(), 'authors': '', 'summary': '', 'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else '' } elif current_paper: if not current_paper['authors']: current_paper['authors'] = line.strip('* ') else: current_paper['summary'] += ' ' + line.strip() if current_paper: papers.append(current_paper) return papers def perform_ai_lookup(query): client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") response = client.predict( query, 20, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md" ) papers = parse_arxiv_refs(response[0]) marquee_settings = display_marquee_controls() for paper in papers: content = f"πŸ“„ {paper['title']} | πŸ‘€ {paper['authors']} | πŸ“ {paper['summary']}" streamlit_marquee( content=content, **marquee_settings, key=f"paper_{paper['id'] or random.randint(1000,9999)}" ) st.write("") # Spacing return papers def main(): marquee_settings = display_marquee_controls() streamlit_marquee( content="πŸš€ Welcome to TalkingAIResearcher | πŸ€– Your Research Assistant", **marquee_settings, key="welcome" ) tab = st.radio("Action:", ["🎀 Voice", "πŸ” ArXiv", "πŸ“ Editor"], horizontal=True) if tab == "πŸ” ArXiv": query = st.text_input("πŸ” Search:") if query: papers = perform_ai_lookup(query) st.write(f"Found {len(papers)} papers") groups = load_files_for_sidebar() display_file_manager_sidebar(groups) if st.session_state.should_rerun: st.session_state.should_rerun = False st.rerun() # Condensed sidebar markdown sidebar_md = """# πŸ“š Research Papers ## 🧠 AGI Levels L0 ❌ No AI L1 🌱 ChatGPT/Bard [2303.08774v1](https://arxiv.org/abs/2303.08774) [PDF](https://arxiv.org/pdf/2303.08774.pdf) L2 πŸ’ͺ Watson [2201.11903v1](https://arxiv.org/abs/2201.11903) [PDF](https://arxiv.org/pdf/2201.11903.pdf) L3 🎯 DALLΒ·E [2204.06125v1](https://arxiv.org/abs/2204.06125) [PDF](https://arxiv.org/pdf/2204.06125.pdf) L4 πŸ† AlphaGo [1712.01815v1](https://arxiv.org/abs/1712.01815) [PDF](https://arxiv.org/pdf/1712.01815.pdf) L5 πŸš€ AlphaFold [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf) ## 🧬 AlphaFold2 [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf) 1. 🧬 Input Seq β†’ 2. πŸ” DB Search β†’ 3. 🧩 MSA 4. πŸ“‘ Templates β†’ 5. πŸ”„ Evoformer β†’ 6. 🧱 Structure 7. 🎯 3D Predict β†’ 8. ♻️ Recycle x3""" st.sidebar.markdown(sidebar_md) if __name__ == "__main__": main()