File size: 4,881 Bytes
9abe9e2
a180889
9abe9e2
 
 
 
 
10717ba
9abe9e2
 
 
a180889
9abe9e2
 
 
 
 
 
a180889
9abe9e2
 
 
10717ba
a180889
10717ba
a180889
 
 
 
 
 
10717ba
a180889
 
9abe9e2
 
10717ba
 
 
 
 
 
 
 
 
9abe9e2
10717ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9abe9e2
10717ba
9fc0f69
10717ba
9abe9e2
 
9fc0f69
a180889
bd477c5
9fc0f69
 
 
10717ba
a180889
10717ba
 
 
 
 
 
a180889
bd477c5
a180889
10717ba
 
 
 
a180889
 
 
10717ba
a180889
10717ba
a180889
 
 
 
 
 
10717ba
 
a180889
 
10717ba
 
 
 
 
a180889
 
10717ba
 
a180889
10717ba
 
a180889
10717ba
 
 
a180889
 
10717ba
a180889
10717ba
 
 
 
 
 
a180889
10717ba
 
 
 
dfadfd3
9fc0f69
bd477c5
10717ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import streamlit as st
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
import plotly.graph_objects as go
import streamlit.components.v1 as components
from datetime import datetime
from audio_recorder_streamlit import audio_recorder
from bs4 import BeautifulSoup
from collections import defaultdict
from dotenv import load_dotenv
from gradio_client import Client
from huggingface_hub import InferenceClient
from io import BytesIO
from PIL import Image
from PyPDF2 import PdfReader
from urllib.parse import quote
from xml.etree import ElementTree as ET
from openai import OpenAI
import extra_streamlit_components as stx
from streamlit.runtime.scriptrunner import get_script_run_ctx
import asyncio
import edge_tts

# --- Configuration & Setup ---
st.set_page_config(
    page_title="BikeAI Claude/GPT Research",
    page_icon="🚲🏆",
    layout="wide",
    initial_sidebar_state="auto",
    menu_items={
        'Get Help': 'https://huggingface.co/awacke1',
        'Report a bug': 'https://huggingface.co/spaces/awacke1',
        'About': "BikeAI Claude/GPT Research AI"
    }
)
load_dotenv()

# --- API Setup & Clients ---
def init_api_clients():
    api_keys = {k: os.getenv(k, "") for k in ('OPENAI_API_KEY', 'ANTHROPIC_API_KEY_3', 'xai', 'HF_KEY', 'API_URL')}
    api_keys.update({k: v for k, v in st.secrets.items() if k in api_keys})
    openai.api_key = api_keys['OPENAI_API_KEY']
    return {
        'claude': anthropic.Anthropic(api_key=api_keys['ANTHROPIC_API_KEY_3']),
        'openai': OpenAI(api_key=openai.api_key)
    }

api_clients = init_api_clients()

# --- Session State Management ---
def initialize_session_state():
    defaults = {
        'transcript_history': [],
        'chat_history': [],
        'openai_model': "gpt-4o-2024-05-13",
        'messages': [],
        'last_voice_input': "",
        'editing_file': None,
        'edit_new_name': "",
        'edit_new_content': "",
        'viewing_prefix': None,
        'should_rerun': False,
        'old_val': None
    }
    for k, v in defaults.items():
        if k not in st.session_state:
            st.session_state[k] = v

initialize_session_state()

# --- Custom CSS ---
st.markdown("""
<style>
    .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
    .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
    .stButton>button { margin-right: 0.5rem; }
</style>
""", unsafe_allow_html=True)

# --- Helper Functions ---
def get_high_info_terms(text: str) -> list:
    stop_words = set(['the', 'a', 'an', 'and', 'or', 'in', 'on', 'at', 'to', 'for', 'with'])
    key_phrases = ['artificial intelligence', 'machine learning', 'neural network']
    preserved = [p for p in key_phrases if p in text.lower()]
    words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
    high_info_words = [w for w in words if w not in stop_words and len(w) > 3]
    return list(dict.fromkeys(preserved + high_info_words))[:5]


def generate_filename(prompt, response, file_type="md"):
    prefix = datetime.now().strftime("%y%m_%H%M")
    info_terms = get_high_info_terms(prompt + response)
    snippet = '_'.join(info_terms)
    return f"{prefix}_{snippet[:150]}.{file_type}"


def create_file(prompt, response, file_type="md"):
    filename = generate_filename(prompt, response, file_type)
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(f"{prompt}\n\n{response}")
    return filename


def play_and_download_audio(file_path):
    if file_path and os.path.exists(file_path):
        st.audio(file_path)
        b64 = base64.b64encode(open(file_path, "rb").read()).decode()
        st.markdown(f'<a href="data:audio/mpeg;base64,{b64}" download="{file_path}">Download {file_path}</a>', unsafe_allow_html=True)


async def edge_tts_generate_audio(text, voice="en-US-AriaNeural"):
    out_fn = generate_filename(text, text, "mp3")
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(out_fn)
    return out_fn


# --- ArXiv Lookup ---
def perform_ai_lookup(query, full_audio=False):
    client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
    result = client.predict(query, api_name="/ask_llm")
    st.markdown(f"### 🔎 {query}\n{result}")
    if full_audio:
        audio_file = asyncio.run(edge_tts_generate_audio(result))
        play_and_download_audio(audio_file)
    create_file(query, result)


# --- Main App ---
def main():
    st.sidebar.title("📂 File Manager")
    action = st.radio("Action:", ["🎤 Voice", "🔍 ArXiv"])
    if action == "🔍 ArXiv":
        query = st.text_input("Query")
        if st.button("Run"):
            perform_ai_lookup(query)

    elif action == "🎤 Voice":
        text = st.text_area("Message")
        if st.button("Send"):
            process_with_gpt(text)


if __name__ == "__main__":
    main()