Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import streamlit as st | |
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile | |
import plotly.graph_objects as go | |
import streamlit.components.v1 as components | |
from datetime import datetime | |
from audio_recorder_streamlit import audio_recorder | |
from bs4 import BeautifulSoup | |
from collections import defaultdict, deque | |
from dotenv import load_dotenv | |
from gradio_client import Client | |
from huggingface_hub import InferenceClient | |
from io import BytesIO | |
from PIL import Image | |
from PyPDF2 import PdfReader | |
from urllib.parse import quote | |
from xml.etree import ElementTree as ET | |
from openai import OpenAI | |
import extra_streamlit_components as stx | |
from streamlit.runtime.scriptrunner import get_script_run_ctx | |
import asyncio | |
import edge_tts | |
# 1. Core Configuration & Setup | |
st.set_page_config( | |
page_title="🚲BikeAI🏆 Research Assistant Pro", | |
page_icon="🚲🏆", | |
layout="wide", | |
initial_sidebar_state="auto", | |
menu_items={ | |
'Get Help': 'https://huggingface.co/awacke1', | |
'Report a bug': 'https://huggingface.co/spaces/awacke1', | |
'About': "Research Assistant Pro with Voice Search" | |
} | |
) | |
load_dotenv() | |
# 2. API Setup & Clients | |
openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', '')) | |
anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', st.secrets.get('ANTHROPIC_API_KEY', '')) | |
hf_key = os.getenv('HF_KEY', st.secrets.get('HF_KEY', '')) | |
openai_client = OpenAI(api_key=openai_api_key) | |
claude_client = anthropic.Anthropic(api_key=anthropic_key) | |
# 3. Session State Management | |
if 'transcript_history' not in st.session_state: | |
st.session_state['transcript_history'] = [] | |
if 'chat_history' not in st.session_state: | |
st.session_state['chat_history'] = [] | |
if 'openai_model' not in st.session_state: | |
st.session_state['openai_model'] = "gpt-4-vision-preview" | |
if 'messages' not in st.session_state: | |
st.session_state['messages'] = [] | |
if 'last_voice_input' not in st.session_state: | |
st.session_state['last_voice_input'] = "" | |
if 'editing_file' not in st.session_state: | |
st.session_state['editing_file'] = None | |
if 'current_audio' not in st.session_state: | |
st.session_state['current_audio'] = None | |
if 'autoplay_audio' not in st.session_state: | |
st.session_state['autoplay_audio'] = True | |
if 'should_rerun' not in st.session_state: | |
st.session_state['should_rerun'] = False | |
if 'old_val' not in st.session_state: | |
st.session_state['old_val'] = None | |
# 4. Style Definitions | |
st.markdown(""" | |
<style> | |
.main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; } | |
.stMarkdown { font-family: 'Helvetica Neue', sans-serif; } | |
.stButton>button { | |
margin-right: 0.5rem; | |
background-color: #4CAF50; | |
color: white; | |
padding: 0.5rem 1rem; | |
border-radius: 5px; | |
border: none; | |
transition: background-color 0.3s; | |
} | |
.stButton>button:hover { | |
background-color: #45a049; | |
} | |
.audio-player { | |
margin: 1rem 0; | |
padding: 1rem; | |
border-radius: 10px; | |
background: white; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
} | |
.file-manager { | |
padding: 1rem; | |
background: white; | |
border-radius: 10px; | |
margin: 1rem 0; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
FILE_EMOJIS = { | |
"md": "📝", | |
"mp3": "🎵", | |
"mp4": "🎥", | |
"png": "🖼️", | |
"jpg": "📸" | |
} | |
# 5. Voice Recognition Component | |
def create_voice_component(): | |
"""Create auto-searching voice recognition component""" | |
return components.html( | |
""" | |
<div style="padding: 20px; border-radius: 10px; background: #f0f2f6;"> | |
<div id="status" style="margin-bottom: 10px; color: #666;">Starting voice recognition...</div> | |
<div id="interim" style="color: #666; min-height: 24px;"></div> | |
<div id="output" style="margin-top: 10px; padding: 10px; min-height: 100px; | |
background: white; border-radius: 5px; white-space: pre-wrap;"></div> | |
<script> | |
if ('webkitSpeechRecognition' in window) { | |
const recognition = new webkitSpeechRecognition(); | |
recognition.continuous = true; | |
recognition.interimResults = true; | |
const status = document.getElementById('status'); | |
const interim = document.getElementById('interim'); | |
const output = document.getElementById('output'); | |
let fullTranscript = ''; | |
let lastPauseTime = Date.now(); | |
let pauseThreshold = 1500; // Time in ms to wait before triggering search | |
// Auto-start on load | |
window.addEventListener('load', () => { | |
setTimeout(() => { | |
try { | |
recognition.start(); | |
status.textContent = 'Listening...'; | |
} catch (e) { | |
console.error('Start error:', e); | |
status.textContent = 'Error starting recognition'; | |
} | |
}, 1000); | |
}); | |
recognition.onresult = (event) => { | |
let interimTranscript = ''; | |
let finalTranscript = ''; | |
for (let i = event.resultIndex; i < event.results.length; i++) { | |
const transcript = event.results[i][0].transcript; | |
if (event.results[i].isFinal) { | |
finalTranscript += transcript + ' '; | |
lastPauseTime = Date.now(); | |
} else { | |
interimTranscript += transcript; | |
} | |
} | |
if (finalTranscript) { | |
fullTranscript += finalTranscript; | |
interim.textContent = ''; | |
output.textContent = fullTranscript; | |
// Send to Streamlit for processing | |
window.parent.postMessage({ | |
type: 'streamlit:setComponentValue', | |
value: fullTranscript, | |
dataType: 'json', | |
}, '*'); | |
} else if (interimTranscript) { | |
interim.textContent = '... ' + interimTranscript; | |
} | |
output.scrollTop = output.scrollHeight; | |
}; | |
// Check for pauses and trigger search | |
setInterval(() => { | |
if (fullTranscript && Date.now() - lastPauseTime > pauseThreshold) { | |
if (output.dataset.lastProcessed !== fullTranscript) { | |
output.dataset.lastProcessed = fullTranscript; | |
window.parent.postMessage({ | |
type: 'streamlit:setComponentValue', | |
value: { | |
text: fullTranscript, | |
trigger: 'pause' | |
}, | |
dataType: 'json', | |
}, '*'); | |
} | |
} | |
}, 500); | |
recognition.onend = () => { | |
try { | |
recognition.start(); | |
status.textContent = 'Listening...'; | |
} catch (e) { | |
console.error('Restart error:', e); | |
status.textContent = 'Recognition stopped. Refresh to restart.'; | |
} | |
}; | |
recognition.onerror = (event) => { | |
console.error('Recognition error:', event.error); | |
status.textContent = 'Error: ' + event.error; | |
}; | |
} else { | |
document.getElementById('status').textContent = 'Speech recognition not supported in this browser'; | |
} | |
</script> | |
</div> | |
""", | |
height=200 | |
) | |
# Available English voices | |
ENGLISH_VOICES = [ | |
"en-US-AriaNeural", # Female, conversational | |
"en-US-JennyNeural", # Female, customer service | |
"en-US-GuyNeural", # Male, newscast | |
"en-US-RogerNeural", # Male, calm | |
"en-GB-SoniaNeural", # British female | |
"en-GB-RyanNeural", # British male | |
"en-AU-NatashaNeural", # Australian female | |
"en-AU-WilliamNeural", # Australian male | |
"en-CA-ClaraNeural", # Canadian female | |
"en-CA-LiamNeural", # Canadian male | |
"en-IE-EmilyNeural", # Irish female | |
"en-IE-ConnorNeural", # Irish male | |
"en-IN-NeerjaNeural", # Indian female | |
"en-IN-PrabhatNeural", # Indian male | |
] | |
def render_search_interface(): | |
"""Render main search interface with auto-search voice component""" | |
st.header("🔍 Voice Search") | |
# Voice settings | |
col1, col2 = st.columns([2, 1]) | |
with col1: | |
selected_voice = st.selectbox( | |
"Select Voice", | |
ENGLISH_VOICES, | |
index=0, | |
help="Choose the voice for audio responses" | |
) | |
with col2: | |
auto_search = st.checkbox("Auto-Search on Pause", value=True) | |
# Voice component | |
voice_result = create_voice_component() | |
# Handle voice input | |
if voice_result and isinstance(voice_result, (str, dict)): | |
# Extract text and trigger info | |
if isinstance(voice_result, dict): | |
current_text = voice_result.get('text', '') | |
trigger = voice_result.get('trigger') | |
else: | |
current_text = voice_result | |
trigger = None | |
# Process on pause trigger if enabled | |
if auto_search and trigger == 'pause' and current_text: | |
if current_text != st.session_state.get('last_processed_text', ''): | |
st.session_state.last_processed_text = current_text | |
# Show the detected text | |
st.info(f"🎤 Detected: {current_text}") | |
# Perform search | |
try: | |
with st.spinner("Searching and generating audio response..."): | |
response, audio_file = asyncio.run( | |
process_voice_search( | |
current_text, | |
voice=selected_voice | |
) | |
) | |
if response: | |
st.markdown(response) | |
if audio_file: | |
render_audio_result(audio_file, "Search Results") | |
# Save to history | |
st.session_state.transcript_history.append({ | |
'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
'query': current_text, | |
'response': response, | |
'audio': audio_file | |
}) | |
except Exception as e: | |
st.error(f"Error processing search: {str(e)}") | |
# Manual search option | |
with st.expander("📝 Manual Search", expanded=False): | |
query = st.text_input("Search Query:", value=st.session_state.get('last_processed_text', '')) | |
if st.button("🔍 Search"): | |
try: | |
with st.spinner("Searching and generating audio..."): | |
response, audio_file = asyncio.run( | |
process_voice_search( | |
query, | |
voice=selected_voice | |
) | |
) | |
if response: | |
st.markdown(response) | |
if audio_file: | |
render_audio_result(audio_file) | |
except Exception as e: | |
st.error(f"Error processing search: {str(e)}") | |
# 6. Audio Processing Functions | |
def get_autoplay_audio_html(audio_path, width="100%"): | |
"""Create HTML for autoplaying audio with controls""" | |
try: | |
with open(audio_path, "rb") as audio_file: | |
audio_bytes = audio_file.read() | |
audio_b64 = base64.b64encode(audio_bytes).decode() | |
return f''' | |
<audio controls autoplay style="width: {width};"> | |
<source src="data:audio/mpeg;base64,{audio_b64}" type="audio/mpeg"> | |
Your browser does not support the audio element. | |
</audio> | |
<div style="margin-top: 5px;"> | |
<a href="data:audio/mpeg;base64,{audio_b64}" | |
download="{os.path.basename(audio_path)}" | |
style="text-decoration: none;"> | |
⬇️ Download Audio | |
</a> | |
</div> | |
''' | |
except Exception as e: | |
return f"Error loading audio: {str(e)}" | |
def clean_for_speech(text: str) -> str: | |
"""Clean text for speech synthesis""" | |
text = text.replace("\n", " ") | |
text = text.replace("</s>", " ") | |
text = text.replace("#", "") | |
text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text) | |
text = re.sub(r"\s+", " ", text).strip() | |
return text | |
async def generate_audio(text, voice="en-US-AriaNeural", rate="+0%", pitch="+0Hz"): | |
"""Generate audio using Edge TTS""" | |
text = clean_for_speech(text) | |
if not text.strip(): | |
return None | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
output_file = f"response_{timestamp}.mp3" | |
communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) | |
await communicate.save(output_file) | |
return output_file | |
def render_audio_result(audio_file, title="Generated Audio"): | |
"""Render audio result with autoplay in Streamlit""" | |
if audio_file and os.path.exists(audio_file): | |
st.markdown(f"### {title}") | |
st.markdown(get_autoplay_audio_html(audio_file), unsafe_allow_html=True) | |
# 7. File Operations | |
def generate_filename(text, response="", file_type="md"): | |
"""Generate intelligent filename""" | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
safe_text = re.sub(r'[^\w\s-]', '', text[:50]) | |
return f"{timestamp}_{safe_text}.{file_type}" | |
def create_file(text, response, file_type="md"): | |
"""Create file with content""" | |
filename = generate_filename(text, response, file_type) | |
with open(filename, 'w', encoding='utf-8') as f: | |
f.write(f"{text}\n\n{response}") | |
return filename | |
def get_download_link(file_path): | |
"""Generate download link for file""" | |
with open(file_path, "rb") as file: | |
contents = file.read() | |
b64 = base64.b64encode(contents).decode() | |
file_name = os.path.basename(file_path) | |
return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">⬇️ Download {file_name}</a>' | |
# 8. Search and Process Functions | |
def perform_arxiv_search(query, response_type="summary"): | |
"""Enhanced Arxiv search with voice response""" | |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
# Get search results and AI interpretation | |
refs = client.predict( | |
query, 20, "Semantic Search", | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
api_name="/update_with_rag_md" | |
)[0] | |
summary = client.predict( | |
query, | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
True, | |
api_name="/ask_llm" | |
) | |
# Format response | |
response = f"### 🔎 Search Results for: {query}\n\n{summary}\n\n### 📚 References\n\n{refs}" | |
return response, refs | |
async def process_voice_search(query): | |
"""Process voice search with automatic audio""" | |
response, refs = perform_arxiv_search(query) | |
# Generate audio from response | |
audio_file = await generate_audio(response) | |
# Update state | |
st.session_state.current_audio = audio_file | |
return response, audio_file | |
def process_with_gpt(text): | |
"""Process text with GPT-4""" | |
if not text: | |
return | |
st.session_state.messages.append({"role": "user", "content": text}) | |
with st.chat_message("user"): | |
st.markdown(text) | |
with st.chat_message("assistant"): | |
response = openai_client.chat.completions.create( | |
model=st.session_state.openai_model, | |
messages=st.session_state.messages, | |
stream=False | |
) | |
answer = response.choices[0].message.content | |
st.write(f"GPT-4: {answer}") | |
# Generate audio response | |
audio_file = asyncio.run(generate_audio(answer)) | |
if audio_file: | |
render_audio_result(audio_file, "GPT-4 Response") | |
# Save response | |
create_file(text, answer, "md") | |
st.session_state.messages.append({"role": "assistant", "content": answer}) | |
return answer | |
def process_with_claude(text): | |
"""Process text with Claude""" | |
if not text: | |
return | |
with st.chat_message("user"): | |
st.markdown(text) | |
with st.chat_message("assistant"): | |
response = claude_client.messages.create( | |
model="claude-3-sonnet-20240229", | |
max_tokens=1000, | |
messages=[{"role": "user", "content": text}] | |
) | |
answer = response.content[0].text | |
st.write(f"Claude-3: {answer}") | |
# Generate audio response | |
audio_file = asyncio.run(generate_audio(answer)) | |
if audio_file: | |
render_audio_result(audio_file, "Claude Response") | |
# Save response | |
create_file(text, answer, "md") | |
st.session_state.chat_history.append({"user": text, "claude": answer}) | |
return answer | |
# 9. UI Components | |
def render_search_interface(): | |
"""Render main search interface with voice component""" | |
st.header("🔍 Voice Search") | |
# Voice component with autorun | |
voice_text = create_voice_component() | |
# Handle voice input | |
if voice_text and isinstance(voice_text, (str, dict)): | |
# Convert dict to string if necessary | |
current_text = voice_text if isinstance(voice_text, str) else voice_text.get('value', '') | |
# Compare with last processed text | |
if current_text and current_text != st.session_state.get('last_voice_text', ''): | |
st.session_state.last_voice_text = current_text | |
# Clean the text | |
cleaned_text = current_text.replace('\n', ' ').strip() | |
# Process with selected model | |
if st.session_state.autoplay_audio and cleaned_text: | |
try: | |
response, audio_file = asyncio.run(process_voice_search(cleaned_text)) | |
if response: | |
st.markdown(response) | |
if audio_file: | |
render_audio_result(audio_file, "Search Results") | |
except Exception as e: | |
st.error(f"Error processing voice search: {str(e)}") | |
# Manual search option | |
with st.expander("📝 Manual Search", expanded=False): | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
query = st.text_input("Enter search query:") | |
with col2: | |
if st.button("🔍 Search"): | |
try: | |
response, audio_file = asyncio.run(process_voice_search(query)) | |
if response: | |
st.markdown(response) | |
if audio_file: | |
render_audio_result(audio_file) | |
except Exception as e: | |
st.error(f"Error processing search: {str(e)}") | |
def display_file_manager(): | |
"""Display file manager with media preview""" | |
st.sidebar.title("📁 File Manager") | |
files = { | |
'Documents': glob.glob("*.md"), | |
'Audio': glob.glob("*.mp3"), | |
'Video': glob.glob("*.mp4"), | |
'Images': glob.glob("*.png") + glob.glob("*.jpg") | |
} | |
# Top actions | |
col1, col2 = st.sidebar.columns(2) | |
with col1: | |
if st.button("🗑 Delete All"): | |
for category in files.values(): | |
for file in category: | |
os.remove(file) | |
st.rerun() | |
with col2: | |
if st.button("⬇️ Download All"): | |
zip_name = f"archive_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip" | |
with zipfile.ZipFile(zip_name, 'w') as zipf: | |
for category in files.values(): | |
for file in category: | |
zipf.write(file) | |
st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True) | |
# Display files by category | |
for category, category_files in files.items(): | |
if category_files: | |
with st.sidebar.expander(f"{FILE_EMOJIS.get(category.lower(), '📄')} {category} ({len(category_files)})", expanded=True): | |
for file in sorted(category_files, key=os.path.getmtime, reverse=True): | |
col1, col2, col3 = st.columns([3, 1, 1]) | |
with col1: | |
st.markdown(f"**{os.path.basename(file)}**") | |
with col2: | |
st.markdown(get_download_link(file), unsafe_allow_html=True) | |
with col3: | |
if st.button("🗑", key=f"del_{file}"): | |
os.remove(file) | |
st.rerun() | |
def display_media_gallery(): | |
"""Display media files in gallery format""" | |
media_tabs = st.tabs(["🎵 Audio", "🎥 Video", "📷 Images"]) | |
with media_tabs[0]: | |
audio_files = glob.glob("*.mp3") | |
if audio_files: | |
for audio_file in audio_files: | |
st.markdown(get_autoplay_audio_html(audio_file), unsafe_allow_html=True) | |
else: | |
st.write("No audio files found") | |
with media_tabs[1]: | |
video_files = glob.glob("*.mp4") | |
if video_files: | |
cols = st.columns(2) | |
for idx, video_file in enumerate(video_files): | |
with cols[idx % 2]: | |
st.video(video_file) | |
else: | |
st.write("No video files found") | |
with media_tabs[2]: | |
image_files = glob.glob("*.png") + glob.glob("*.jpg") | |
if image_files: | |
cols = st.columns(3) | |
for idx, image_file in enumerate(image_files): | |
with cols[idx % 3]: | |
st.image(Image.open(image_file), use_column_width=True) | |
if st.button(f"Analyze {os.path.basename(image_file)}", key=f"analyze_{image_file}"): | |
with st.spinner("Analyzing image..."): | |
analysis = process_with_gpt(f"Analyze this image: {image_file}") | |
st.markdown(analysis) | |
else: | |
st.write("No images found") | |
def display_search_history(): | |
"""Display search history with audio playback""" | |
st.header("Search History") | |
history_tabs = st.tabs(["🔍 Voice Searches", "💬 Chat History"]) | |
with history_tabs[0]: | |
for entry in reversed(st.session_state.transcript_history): | |
with st.expander(f"🔍 {entry['timestamp']} - {entry['query'][:50]}...", expanded=False): | |
st.markdown(entry['response']) | |
if entry.get('audio'): | |
render_audio_result(entry['audio'], "Recorded Response") | |
with history_tabs[1]: | |
chat_tabs = st.tabs(["Claude History", "GPT-4 History"]) | |
with chat_tabs[0]: | |
for chat in st.session_state.chat_history: | |
st.markdown(f"**You:** {chat['user']}") | |
st.markdown(f"**Claude:** {chat['claude']}") | |
st.markdown("---") | |
with chat_tabs[1]: | |
for msg in st.session_state.messages: | |
with st.chat_message(msg["role"]): | |
st.markdown(msg["content"]) | |
# Main Application | |
def main(): | |
st.title("🔬 Research Assistant Pro") | |
# Initialize autorun setting | |
if 'autorun' not in st.session_state: | |
st.session_state.autorun = True | |
# Settings sidebar | |
with st.sidebar: | |
st.title("⚙️ Settings") | |
st.session_state.autorun = st.checkbox("Enable Autorun", value=True) | |
st.subheader("Voice Settings") | |
voice_options = [ | |
"en-US-AriaNeural", | |
"en-US-GuyNeural", | |
"en-GB-SoniaNeural", | |
"en-AU-NatashaNeural" | |
] | |
selected_voice = st.selectbox("Select Voice", voice_options) | |
st.subheader("Audio Settings") | |
rate = st.slider("Speech Rate", -50, 50, 0, 5) | |
pitch = st.slider("Pitch", -50, 50, 0, 5) | |
st.session_state.autoplay_audio = st.checkbox( | |
"Autoplay Audio", | |
value=True, | |
help="Automatically play audio when generated" | |
) | |
# Main content tabs | |
tabs = st.tabs(["🎤 Voice Search", "📚 History", "🎵 Media", "⚙️ Advanced"]) | |
with tabs[0]: | |
render_search_interface() | |
with tabs[1]: | |
display_search_history() | |
with tabs[2]: | |
display_media_gallery() | |
with tabs[3]: | |
st.header("Advanced Settings") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.subheader("Model Settings") | |
st.selectbox( | |
"Default Search Model", | |
["Claude-3", "GPT-4", "Mixtral-8x7B"], | |
key="default_model" | |
) | |
st.number_input( | |
"Max Results", | |
min_value=5, | |
max_value=50, | |
value=20, | |
key="max_results" | |
) | |
with col2: | |
st.subheader("Audio Settings") | |
st.slider( | |
"Max Audio Duration (seconds)", | |
min_value=30, | |
max_value=300, | |
value=120, | |
step=30, | |
key="max_audio_duration" | |
) | |
st.checkbox( | |
"High Quality Audio", | |
value=True, | |
key="high_quality_audio" | |
) | |
# File manager sidebar | |
display_file_manager() | |
# Handle rerun if needed | |
if st.session_state.get('should_rerun', False): | |
st.session_state.should_rerun = False | |
st.rerun() | |
if __name__ == "__main__": | |
main() |