import streamlit as st import anthropic import openai import base64 import cv2 import glob import json import math import os import pytz import random import re import requests import textract import time import zipfile import plotly.graph_objects as go import streamlit.components.v1 as components from datetime import datetime from audio_recorder_streamlit import audio_recorder from bs4 import BeautifulSoup from collections import defaultdict, deque from dotenv import load_dotenv from gradio_client import Client from huggingface_hub import InferenceClient from io import BytesIO from PIL import Image from PyPDF2 import PdfReader from urllib.parse import quote from xml.etree import ElementTree as ET from openai import OpenAI import extra_streamlit_components as stx from streamlit.runtime.scriptrunner import get_script_run_ctx import asyncio import edge_tts # ๐ŸŽฏ 1. Core Configuration & Setup st.set_page_config( page_title="๐ŸšฒBikeAI๐Ÿ† Claude/GPT Research", page_icon="๐Ÿšฒ๐Ÿ†", layout="wide", initial_sidebar_state="auto", menu_items={ 'Get Help': 'https://huggingface.co/awacke1', 'Report a bug': 'https://huggingface.co/spaces/awacke1', 'About': "๐ŸšฒBikeAI๐Ÿ† Claude/GPT Research AI" } ) load_dotenv() # ๐Ÿ”‘ 2. API Setup & Clients openai_api_key = os.getenv('OPENAI_API_KEY', "") anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "") xai_key = os.getenv('xai',"") if 'OPENAI_API_KEY' in st.secrets: openai_api_key = st.secrets['OPENAI_API_KEY'] if 'ANTHROPIC_API_KEY' in st.secrets: anthropic_key = st.secrets["ANTHROPIC_API_KEY"] openai.api_key = openai_api_key claude_client = anthropic.Anthropic(api_key=anthropic_key) openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID')) HF_KEY = os.getenv('HF_KEY') API_URL = os.getenv('API_URL') # ๐Ÿ“ 3. Session State Management if 'transcript_history' not in st.session_state: st.session_state['transcript_history'] = [] if 'chat_history' not in st.session_state: st.session_state['chat_history'] = [] if 'openai_model' not in st.session_state: st.session_state['openai_model'] = "gpt-4o-2024-05-13" if 'messages' not in st.session_state: st.session_state['messages'] = [] if 'last_voice_input' not in st.session_state: st.session_state['last_voice_input'] = "" if 'editing_file' not in st.session_state: st.session_state['editing_file'] = None if 'edit_new_name' not in st.session_state: st.session_state['edit_new_name'] = "" if 'edit_new_content' not in st.session_state: st.session_state['edit_new_content'] = "" if 'viewing_prefix' not in st.session_state: st.session_state['viewing_prefix'] = None if 'should_rerun' not in st.session_state: st.session_state['should_rerun'] = False if 'old_val' not in st.session_state: st.session_state['old_val'] = None # ๐ŸŽจ 4. Custom CSS st.markdown(""" """, unsafe_allow_html=True) FILE_EMOJIS = { "md": "๐Ÿ“", "mp3": "๐ŸŽต", } # ๐Ÿง  5. High-Information Content Extraction def get_high_info_terms(text: str) -> list: """Extract high-information terms from text, including key phrases.""" stop_words = set([ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'should', 'could', 'might', 'must', 'shall', 'can', 'may', 'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there' ]) key_phrases = [ 'artificial intelligence', 'machine learning', 'deep learning', 'neural network', 'personal assistant', 'natural language', 'computer vision', 'data science', 'reinforcement learning', 'knowledge graph', 'semantic search', 'time series', 'large language model', 'transformer model', 'attention mechanism', 'autonomous system', 'edge computing', 'quantum computing', 'blockchain technology', 'cognitive science', 'human computer', 'decision making', 'arxiv search', 'research paper', 'scientific study', 'empirical analysis' ] # Identify key phrases preserved_phrases = [] lower_text = text.lower() for phrase in key_phrases: if phrase in lower_text: preserved_phrases.append(phrase) text = text.replace(phrase, '') # Extract individual words words = re.findall(r'\b\w+(?:-\w+)*\b', text) high_info_words = [ word.lower() for word in words if len(word) > 3 and word.lower() not in stop_words and not word.isdigit() and any(c.isalpha() for c in word) ] all_terms = preserved_phrases + high_info_words seen = set() unique_terms = [] for term in all_terms: if term not in seen: seen.add(term) unique_terms.append(term) max_terms = 5 return unique_terms[:max_terms] def clean_text_for_filename(text: str) -> str: """Remove punctuation and short filler words, return a compact string.""" text = text.lower() text = re.sub(r'[^\w\s-]', '', text) words = text.split() stop_short = set(['the','and','for','with','this','that','from','just','very','then','been','only','also','about']) filtered = [w for w in words if len(w)>3 and w not in stop_short] return '_'.join(filtered)[:200] # ๐Ÿ“ 6. File Operations def generate_filename(prompt, response, file_type="md"): """ Generate filename with meaningful terms and short dense clips from prompt & response. The filename should be about 150 chars total, include high-info terms, and a clipped snippet. """ prefix = datetime.now().strftime("%y%m_%H%M") + "_" combined = (prompt + " " + response).strip() info_terms = get_high_info_terms(combined) # Include a short snippet from prompt and response snippet = (prompt[:100] + " " + response[:100]).strip() snippet_cleaned = clean_text_for_filename(snippet) # Combine info terms and snippet name_parts = info_terms + [snippet_cleaned] full_name = '_'.join(name_parts) # Trim to ~150 chars if len(full_name) > 150: full_name = full_name[:150] filename = f"{prefix}{full_name}.{file_type}" return filename def create_file(prompt, response, file_type="md"): """Create file with an intelligent naming scheme.""" filename = generate_filename(prompt.strip(), response.strip(), file_type) with open(filename, 'w', encoding='utf-8') as f: f.write(prompt + "\n\n" + response) return filename def get_download_link(file): """Generate download link for file""" with open(file, "rb") as f: b64 = base64.b64encode(f.read()).decode() return f'๐Ÿ“‚ Download {os.path.basename(file)}' # ๐Ÿ”Š 7. Audio Processing def clean_for_speech(text: str) -> str: """Clean text for speech synthesis""" text = text.replace("\n", " ") text = text.replace("", " ") text = text.replace("#", "") text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text) text = re.sub(r"\s+", " ", text).strip() return text @st.cache_resource def speech_synthesis_html(result): """Create HTML for speech synthesis""" html_code = f""" """ components.html(html_code, height=0) async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0): """Generate audio using Edge TTS (async)""" text = clean_for_speech(text) if not text.strip(): return None rate_str = f"{rate:+d}%" pitch_str = f"{pitch:+d}Hz" communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str) out_fn = generate_filename(text, text, "mp3") await communicate.save(out_fn) return out_fn def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0): """Wrapper for edge TTS generation (sync)""" return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch)) def play_and_download_audio(file_path): """Play and provide a download link for audio""" if file_path and os.path.exists(file_path): st.audio(file_path) dl_link = f'Download {os.path.basename(file_path)}' st.markdown(dl_link, unsafe_allow_html=True) def auto_play_audio(file_path): """ Reads MP3 file as base64, displays an