DeepResearchEvaluator

Running

File size: 4,881 Bytes

import streamlit as st
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
import plotly.graph_objects as go
import streamlit.components.v1 as components
from datetime import datetime
from audio_recorder_streamlit import audio_recorder
from bs4 import BeautifulSoup
from collections import defaultdict
from dotenv import load_dotenv
from gradio_client import Client
from huggingface_hub import InferenceClient
from io import BytesIO
from PIL import Image
from PyPDF2 import PdfReader
from urllib.parse import quote
from xml.etree import ElementTree as ET
from openai import OpenAI
import extra_streamlit_components as stx
from streamlit.runtime.scriptrunner import get_script_run_ctx
import asyncio
import edge_tts

# --- Configuration & Setup ---
st.set_page_config(
    page_title="BikeAI Claude/GPT Research",
    page_icon="🚲🏆",
    layout="wide",
    initial_sidebar_state="auto",
    menu_items={
        'Get Help': 'https://huggingface.co/awacke1',
        'Report a bug': 'https://huggingface.co/spaces/awacke1',
        'About': "BikeAI Claude/GPT Research AI"
    }
)
load_dotenv()

# --- API Setup & Clients ---
def init_api_clients():
    api_keys = {k: os.getenv(k, "") for k in ('OPENAI_API_KEY', 'ANTHROPIC_API_KEY_3', 'xai', 'HF_KEY', 'API_URL')}
    api_keys.update({k: v for k, v in st.secrets.items() if k in api_keys})
    openai.api_key = api_keys['OPENAI_API_KEY']
    return {
        'claude': anthropic.Anthropic(api_key=api_keys['ANTHROPIC_API_KEY_3']),
        'openai': OpenAI(api_key=openai.api_key)
    }

api_clients = init_api_clients()

# --- Session State Management ---
def initialize_session_state():
    defaults = {
        'transcript_history': [],
        'chat_history': [],
        'openai_model': "gpt-4o-2024-05-13",
        'messages': [],
        'last_voice_input': "",
        'editing_file': None,
        'edit_new_name': "",
        'edit_new_content': "",
        'viewing_prefix': None,
        'should_rerun': False,
        'old_val': None
    }
    for k, v in defaults.items():
        if k not in st.session_state:
            st.session_state[k] = v

initialize_session_state()

# --- Custom CSS ---
st.markdown("""
<style>
    .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
    .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
    .stButton>button { margin-right: 0.5rem; }
</style>
""", unsafe_allow_html=True)

# --- Helper Functions ---
def get_high_info_terms(text: str) -> list:
    stop_words = set(['the', 'a', 'an', 'and', 'or', 'in', 'on', 'at', 'to', 'for', 'with'])
    key_phrases = ['artificial intelligence', 'machine learning', 'neural network']
    preserved = [p for p in key_phrases if p in text.lower()]
    words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
    high_info_words = [w for w in words if w not in stop_words and len(w) > 3]
    return list(dict.fromkeys(preserved + high_info_words))[:5]


def generate_filename(prompt, response, file_type="md"):
    prefix = datetime.now().strftime("%y%m_%H%M")
    info_terms = get_high_info_terms(prompt + response)
    snippet = '_'.join(info_terms)
    return f"{prefix}_{snippet[:150]}.{file_type}"


def create_file(prompt, response, file_type="md"):
    filename = generate_filename(prompt, response, file_type)
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(f"{prompt}\n\n{response}")
    return filename


def play_and_download_audio(file_path):
    if file_path and os.path.exists(file_path):
        st.audio(file_path)
        b64 = base64.b64encode(open(file_path, "rb").read()).decode()
        st.markdown(f'<a href="data:audio/mpeg;base64,{b64}" download="{file_path}">Download {file_path}</a>', unsafe_allow_html=True)


async def edge_tts_generate_audio(text, voice="en-US-AriaNeural"):
    out_fn = generate_filename(text, text, "mp3")
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(out_fn)
    return out_fn


# --- ArXiv Lookup ---
def perform_ai_lookup(query, full_audio=False):
    client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
    result = client.predict(query, api_name="/ask_llm")
    st.markdown(f"### 🔎 {query}\n{result}")
    if full_audio:
        audio_file = asyncio.run(edge_tts_generate_audio(result))
        play_and_download_audio(audio_file)
    create_file(query, result)


# --- Main App ---
def main():
    st.sidebar.title("📂 File Manager")
    action = st.radio("Action:", ["🎤 Voice", "🔍 ArXiv"])
    if action == "🔍 ArXiv":
        query = st.text_input("Query")
        if st.button("Run"):
            perform_ai_lookup(query)

    elif action == "🎤 Voice":
        text = st.text_area("Message")
        if st.button("Send"):
            process_with_gpt(text)


if __name__ == "__main__":
    main()