Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import streamlit as st | |
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile | |
import plotly.graph_objects as go | |
import streamlit.components.v1 as components | |
from datetime import datetime | |
from audio_recorder_streamlit import audio_recorder | |
from bs4 import BeautifulSoup | |
from collections import defaultdict | |
from dotenv import load_dotenv | |
from gradio_client import Client | |
from huggingface_hub import InferenceClient | |
from io import BytesIO | |
from PIL import Image | |
from PyPDF2 import PdfReader | |
from urllib.parse import quote | |
from xml.etree import ElementTree as ET | |
from openai import OpenAI | |
import extra_streamlit_components as stx | |
from streamlit.runtime.scriptrunner import get_script_run_ctx | |
import asyncio | |
import edge_tts | |
# --- Configuration & Setup --- | |
st.set_page_config( | |
page_title="BikeAI Claude/GPT Research", | |
page_icon="🚲🏆", | |
layout="wide", | |
initial_sidebar_state="auto", | |
menu_items={ | |
'Get Help': 'https://huggingface.co/awacke1', | |
'Report a bug': 'https://huggingface.co/spaces/awacke1', | |
'About': "BikeAI Claude/GPT Research AI" | |
} | |
) | |
load_dotenv() | |
# --- API Setup & Clients --- | |
def init_api_clients(): | |
api_keys = {k: os.getenv(k, "") for k in ('OPENAI_API_KEY', 'ANTHROPIC_API_KEY_3', 'xai', 'HF_KEY', 'API_URL')} | |
api_keys.update({k: v for k, v in st.secrets.items() if k in api_keys}) | |
openai.api_key = api_keys['OPENAI_API_KEY'] | |
return { | |
'claude': anthropic.Anthropic(api_key=api_keys['ANTHROPIC_API_KEY_3']), | |
'openai': OpenAI(api_key=openai.api_key) | |
} | |
api_clients = init_api_clients() | |
# --- Session State Management --- | |
def initialize_session_state(): | |
defaults = { | |
'transcript_history': [], | |
'chat_history': [], | |
'openai_model': "gpt-4o-2024-05-13", | |
'messages': [], | |
'last_voice_input': "", | |
'editing_file': None, | |
'edit_new_name': "", | |
'edit_new_content': "", | |
'viewing_prefix': None, | |
'should_rerun': False, | |
'old_val': None | |
} | |
for k, v in defaults.items(): | |
if k not in st.session_state: | |
st.session_state[k] = v | |
initialize_session_state() | |
# --- Custom CSS --- | |
st.markdown(""" | |
<style> | |
.main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; } | |
.stMarkdown { font-family: 'Helvetica Neue', sans-serif; } | |
.stButton>button { margin-right: 0.5rem; } | |
</style> | |
""", unsafe_allow_html=True) | |
# --- Helper Functions --- | |
def get_high_info_terms(text: str) -> list: | |
stop_words = set(['the', 'a', 'an', 'and', 'or', 'in', 'on', 'at', 'to', 'for', 'with']) | |
key_phrases = ['artificial intelligence', 'machine learning', 'neural network'] | |
preserved = [p for p in key_phrases if p in text.lower()] | |
words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower()) | |
high_info_words = [w for w in words if w not in stop_words and len(w) > 3] | |
return list(dict.fromkeys(preserved + high_info_words))[:5] | |
def generate_filename(prompt, response, file_type="md"): | |
prefix = datetime.now().strftime("%y%m_%H%M") | |
info_terms = get_high_info_terms(prompt + response) | |
snippet = '_'.join(info_terms) | |
return f"{prefix}_{snippet[:150]}.{file_type}" | |
def create_file(prompt, response, file_type="md"): | |
filename = generate_filename(prompt, response, file_type) | |
with open(filename, 'w', encoding='utf-8') as f: | |
f.write(f"{prompt}\n\n{response}") | |
return filename | |
def play_and_download_audio(file_path): | |
if file_path and os.path.exists(file_path): | |
st.audio(file_path) | |
b64 = base64.b64encode(open(file_path, "rb").read()).decode() | |
st.markdown(f'<a href="data:audio/mpeg;base64,{b64}" download="{file_path}">Download {file_path}</a>', unsafe_allow_html=True) | |
async def edge_tts_generate_audio(text, voice="en-US-AriaNeural"): | |
out_fn = generate_filename(text, text, "mp3") | |
communicate = edge_tts.Communicate(text, voice) | |
await communicate.save(out_fn) | |
return out_fn | |
# --- ArXiv Lookup --- | |
def perform_ai_lookup(query, full_audio=False): | |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
result = client.predict(query, api_name="/ask_llm") | |
st.markdown(f"### 🔎 {query}\n{result}") | |
if full_audio: | |
audio_file = asyncio.run(edge_tts_generate_audio(result)) | |
play_and_download_audio(audio_file) | |
create_file(query, result) | |
# --- Main App --- | |
def main(): | |
st.sidebar.title("📂 File Manager") | |
action = st.radio("Action:", ["🎤 Voice", "🔍 ArXiv"]) | |
if action == "🔍 ArXiv": | |
query = st.text_input("Query") | |
if st.button("Run"): | |
perform_ai_lookup(query) | |
elif action == "🎤 Voice": | |
text = st.text_area("Message") | |
if st.button("Send"): | |
process_with_gpt(text) | |
if __name__ == "__main__": | |
main() | |