DeepResearchEvaluator

Running

File size: 9,576 Bytes

3cbe462
57a1ea8
3cbe462
263adfe
1c134dd
8228332
3cbe462
263adfe
8228332
1c134dd
263adfe
 
3cbe462
1c134dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac9fced
263adfe
57a1ea8
 
 
263adfe
57a1ea8
 
ac9fced
215ae70
ac9fced
1c134dd
 
 
 
 
 
 
 
 
 
215ae70
ac9fced
73c62c4
 
215ae70
1c134dd
57a1ea8
1c134dd
57a1ea8
215ae70
ac9fced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c134dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215ae70
57a1ea8
ac9fced
 
57a1ea8
215ae70
ac9fced
 
 
57a1ea8
 
ac9fced
 
 
 
 
1c134dd
ac9fced
1c134dd
ac9fced
 
 
215ae70
1c134dd
 
 
 
 
ac9fced
57a1ea8
3cbe462
57a1ea8
215ae70
3cbe462
57a1ea8
 
3cbe462
 
215ae70
57a1ea8
 
 
 
 
 
 
ac9fced
 
 
1c134dd
ac9fced
1c134dd
 
ac9fced
1c134dd
ac9fced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c134dd
ac9fced
 
3cbe462
 
ac9fced
 
 
 
 
 
72e2bb4
ac9fced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62643c9
ac9fced
 
57a1ea8
ac9fced
57a1ea8
ac9fced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57a1ea8

import streamlit as st
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
from datetime import datetime
from audio_recorder_streamlit import audio_recorder
from collections import defaultdict
from dotenv import load_dotenv
from gradio_client import Client
from huggingface_hub import InferenceClient
from PIL import Image
from streamlit_marquee import streamlit_marquee
import asyncio
import edge_tts

st.set_page_config(page_title="🚲TalkingAIResearcher🏆", page_icon="🚲🏆", layout="wide")

EDGE_TTS_VOICES = ["en-US-AriaNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-GB-SoniaNeural"]
FILE_EMOJIS = {"md": "📝", "mp3": "🎵", "wav": "🔊", "txt": "📄", "pdf": "📑"}

# Session state initialization
for key, default in {
    'tts_voice': EDGE_TTS_VOICES[0],
    'audio_format': 'mp3',
    'messages': [],
    'chat_history': [],
    'transcript_history': [],
    'viewing_prefix': None,
    'should_rerun': False
}.items():
    if key not in st.session_state:
        st.session_state[key] = default

@st.cache_resource
def get_cached_audio_b64(file_path):
    with open(file_path, "rb") as f:
        return base64.b64encode(f.read()).decode()

def beautify_filename(filename):
    name = os.path.splitext(filename)[0]
    return name.replace('_', ' ').replace('.', ' ')

def load_files_for_sidebar():
    """Load and filter files for sidebar by timestamp prefix"""
    files = []
    for f in glob.glob("*.*"):
        basename = os.path.basename(f)
        if f.endswith('.md'):
            if len(basename) >= 9 and re.match(r'\d{4}_\d{4}', basename[:9]):
                files.append(f)
        else:
            files.append(f)
            
    groups = defaultdict(list)
    for f in files:
        basename = os.path.basename(f)
        group_name = basename[:9] if len(basename) >= 9 else 'Other'
        groups[group_name].append(f)
        
    return sorted(groups.items(), 
                 key=lambda x: max(os.path.getmtime(f) for f in x[1]),
                 reverse=True)

def display_marquee_controls():
    st.sidebar.markdown("### 🎯 Marquee Settings")
    cols = st.sidebar.columns(2)
    with cols[0]:
        bg_color = st.color_picker("🎨 Background", "#1E1E1E")
        text_color = st.color_picker("✍️ Text", "#FFFFFF")
    with cols[1]:
        font_size = st.slider("📏 Size", 10, 24, 14)
        duration = st.slider("⏱️ Speed", 1, 20, 10)
    
    return {
        "background": bg_color,
        "color": text_color,
        "font-size": f"{font_size}px",
        "animationDuration": f"{duration}s",
        "width": "100%",
        "lineHeight": "35px"
    }

def get_download_link(file_path, file_type="zip"):
    with open(file_path, "rb") as f:
        b64 = base64.b64encode(f.read()).decode()
    ext_map = {'zip': '📦', 'mp3': '🎵', 'wav': '🔊', 'md': '📝'}
    emoji = ext_map.get(file_type, '')
    return f'<a href="data:application/{file_type};base64,{b64}" download="{os.path.basename(file_path)}">{emoji} Download {os.path.basename(file_path)}</a>'

def create_zip_of_files(md_files, mp3_files, wav_files, query=''):
    all_files = md_files + mp3_files + wav_files
    if not all_files: return None
    timestamp = datetime.now().strftime("%y%m_%H%M")
    zip_name = f"{timestamp}_archive.zip"
    with zipfile.ZipFile(zip_name, 'w') as z:
        for f in all_files:
            z.write(f)
    return zip_name

def display_file_manager_sidebar(groups_sorted):
    st.sidebar.title("📚 File Manager")
    all_files = {'md': [], 'mp3': [], 'wav': []}
    
    for _, files in groups_sorted:
        for f in files:
            ext = os.path.splitext(f)[1].lower().strip('.')
            if ext in all_files:
                all_files[ext].append(f)

    cols = st.sidebar.columns(4)
    for i, (ext, files) in enumerate(all_files.items()):
        with cols[i]:
            if st.button(f"🗑️ {ext.upper()}"):
                [os.remove(f) for f in files]
                st.session_state.should_rerun = True

    if st.sidebar.button("📦 Zip All"):
        zip_name = create_zip_of_files(all_files['md'], all_files['mp3'], all_files['wav'])
        if zip_name:
            st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)

    for group_name, files in groups_sorted:
        try:
            timestamp = datetime.strptime(group_name, "%y%m_%H%M").strftime("%Y-%m-%d %H:%M") if len(group_name) == 9 and group_name != 'Other' else group_name
        except ValueError:
            timestamp = group_name

        with st.sidebar.expander(f"📁 {timestamp} ({len(files)})", expanded=True):
            c1, c2 = st.columns(2)
            with c1:
                if st.button("👀", key=f"view_{group_name}"):
                    st.session_state.viewing_prefix = group_name
            with c2:
                if st.button("🗑️", key=f"del_{group_name}"):
                    [os.remove(f) for f in files]
                    st.session_state.should_rerun = True

            for f in files:
                ext = os.path.splitext(f)[1].lower().strip('.')
                emoji = FILE_EMOJIS.get(ext, '📄')
                pretty_name = beautify_filename(os.path.basename(f))
                st.write(f"{emoji} **{pretty_name}**")
                
                if ext in ['mp3', 'wav']:
                    st.audio(f)
                    if st.button("🔄", key=f"loop_{f}"):
                        audio_b64 = get_cached_audio_b64(f)
                        st.components.v1.html(
                            f'''<audio id="player_{f}" loop>
                                <source src="data:audio/{ext};base64,{audio_b64}">
                               </audio>
                               <script>
                                document.getElementById("player_{f}").play();
                               </script>''',
                            height=0
                        )

async def edge_tts_generate(text, voice, file_format="mp3"):
    text = re.sub(r'\s+', ' ', text).strip()
    if not text: return None
    communicate = edge_tts.Communicate(text, voice)
    filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}"
    await communicate.save(filename)
    return filename

def parse_arxiv_refs(text):
    papers = []
    current_paper = None
    
    for line in text.split('\n'):
        if '|' in line:
            if current_paper:
                papers.append(current_paper)
            parts = line.strip('* ').split('|')
            current_paper = {
                'date': parts[0].strip(),
                'title': parts[1].strip(),
                'authors': '',
                'summary': '',
                'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else ''
            }
        elif current_paper:
            if not current_paper['authors']:
                current_paper['authors'] = line.strip('* ')
            else:
                current_paper['summary'] += ' ' + line.strip()
    
    if current_paper:
        papers.append(current_paper)
    return papers

def perform_ai_lookup(query):
    client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
    response = client.predict(
        query, 20, "Semantic Search", 
        "mistralai/Mixtral-8x7B-Instruct-v0.1",
        api_name="/update_with_rag_md"
    )
    
    papers = parse_arxiv_refs(response[0])
    marquee_settings = display_marquee_controls()
    
    for paper in papers:
        content = f"📄 {paper['title']} | 👤 {paper['authors']} | 📝 {paper['summary']}"
        streamlit_marquee(
            content=content,
            **marquee_settings,
            key=f"paper_{paper['id'] or random.randint(1000,9999)}"
        )
        st.write("")
    
    return papers

def main():
    marquee_settings = display_marquee_controls()
    
    streamlit_marquee(
        content="🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant",
        **marquee_settings,
        key="welcome"
    )
    
    tab = st.radio("Action:", ["🎤 Voice", "🔍 ArXiv", "📝 Editor"], horizontal=True)
    
    if tab == "🔍 ArXiv":
        query = st.text_input("🔍 Search:")
        if query:
            papers = perform_ai_lookup(query)
            st.write(f"Found {len(papers)} papers")
    
    groups = load_files_for_sidebar()
    display_file_manager_sidebar(groups)
    
    if st.session_state.should_rerun:
        st.session_state.should_rerun = False
        st.rerun()

# Condensed sidebar markdown
sidebar_md = """# 📚 Research Papers

## 🧠 AGI Levels
L0 ❌ No AI
L1 🌱 ChatGPT/Bard [2303.08774v1](https://arxiv.org/abs/2303.08774) [PDF](https://arxiv.org/pdf/2303.08774.pdf)
L2 💪 Watson [2201.11903v1](https://arxiv.org/abs/2201.11903) [PDF](https://arxiv.org/pdf/2201.11903.pdf)
L3 🎯 DALL·E [2204.06125v1](https://arxiv.org/abs/2204.06125) [PDF](https://arxiv.org/pdf/2204.06125.pdf)
L4 🏆 AlphaGo [1712.01815v1](https://arxiv.org/abs/1712.01815) [PDF](https://arxiv.org/pdf/1712.01815.pdf) 
L5 🚀 AlphaFold [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)

## 🧬 AlphaFold2 
[2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
1. 🧬 Input Seq → 2. 🔍 DB Search → 3. 🧩 MSA
4. 📑 Templates → 5. 🔄 Evoformer → 6. 🧱 Structure
7. 🎯 3D Predict → 8. ♻️ Recycle x3"""

st.sidebar.markdown(sidebar_md)

if __name__ == "__main__":
    main()