diff --git "a/backup22-fulltoobigimho.app.py" "b/backup22-fulltoobigimho.app.py" --- "a/backup22-fulltoobigimho.app.py" +++ "b/backup22-fulltoobigimho.app.py" @@ -1,6 +1,6 @@ import streamlit as st import anthropic -import openai +import openai import base64 import cv2 import glob @@ -34,13 +34,13 @@ from streamlit.runtime.scriptrunner import get_script_run_ctx import asyncio import edge_tts from streamlit_marquee import streamlit_marquee -from concurrent.futures import ThreadPoolExecutor -from functools import partial -from typing import Dict, List, Optional, Tuple, Union +from typing import Tuple, Optional +import pandas as pd # ───────────────────────────────────────────────────────── # 1. CORE CONFIGURATION & SETUP # ───────────────────────────────────────────────────────── + st.set_page_config( page_title="🚲TalkingAIResearcher🏆", page_icon="🚲🏆", @@ -52,10 +52,9 @@ st.set_page_config( 'About': "🚲TalkingAIResearcher🏆" } ) - load_dotenv() -# Available English voices for Edge TTS +# ▶ Available English voices for Edge TTS EDGE_TTS_VOICES = [ "en-US-AriaNeural", "en-US-GuyNeural", @@ -68,51 +67,63 @@ EDGE_TTS_VOICES = [ "en-CA-LiamNeural" ] -# Session state initialization with default values -DEFAULT_SESSION_STATE = { - 'marquee_settings': { +# ▶ Initialize Session State +if 'marquee_settings' not in st.session_state: + st.session_state['marquee_settings'] = { "background": "#1E1E1E", "color": "#FFFFFF", "font-size": "14px", "animationDuration": "20s", "width": "100%", "lineHeight": "35px" - }, - 'tts_voice': EDGE_TTS_VOICES[0], - 'audio_format': 'mp3', - 'transcript_history': [], - 'chat_history': [], - 'openai_model': "gpt-4o-2024-05-13", - 'messages': [], - 'last_voice_input': "", - 'editing_file': None, - 'edit_new_name': "", - 'edit_new_content': "", - 'viewing_prefix': None, - 'should_rerun': False, - 'old_val': None, - 'last_query': "", - 'marquee_content': "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant", - 'enable_audio': False, - 'enable_download': False, - 'enable_claude': True, - 'audio_cache': {}, - 'paper_cache': {}, - 'download_link_cache': {}, - 'performance_metrics': defaultdict(list), - 'operation_timings': defaultdict(float) -} - -# Initialize session state -for key, value in DEFAULT_SESSION_STATE.items(): - if key not in st.session_state: - st.session_state[key] = value - -# API Keys and Configuration + } +if 'tts_voice' not in st.session_state: + st.session_state['tts_voice'] = EDGE_TTS_VOICES[0] +if 'audio_format' not in st.session_state: + st.session_state['audio_format'] = 'mp3' +if 'transcript_history' not in st.session_state: + st.session_state['transcript_history'] = [] +if 'chat_history' not in st.session_state: + st.session_state['chat_history'] = [] +if 'openai_model' not in st.session_state: + st.session_state['openai_model'] = "gpt-4o-2024-05-13" +if 'messages' not in st.session_state: + st.session_state['messages'] = [] +if 'last_voice_input' not in st.session_state: + st.session_state['last_voice_input'] = "" +if 'editing_file' not in st.session_state: + st.session_state['editing_file'] = None +if 'edit_new_name' not in st.session_state: + st.session_state['edit_new_name'] = "" +if 'edit_new_content' not in st.session_state: + st.session_state['edit_new_content'] = "" +if 'viewing_prefix' not in st.session_state: + st.session_state['viewing_prefix'] = None +if 'should_rerun' not in st.session_state: + st.session_state['should_rerun'] = False +if 'old_val' not in st.session_state: + st.session_state['old_val'] = None +if 'last_query' not in st.session_state: + st.session_state['last_query'] = "" +if 'marquee_content' not in st.session_state: + st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant" + +# ▶ Additional keys for performance, caching, etc. +if 'audio_cache' not in st.session_state: + st.session_state['audio_cache'] = {} +if 'download_link_cache' not in st.session_state: + st.session_state['download_link_cache'] = {} +if 'operation_timings' not in st.session_state: + st.session_state['operation_timings'] = {} +if 'performance_metrics' not in st.session_state: + st.session_state['performance_metrics'] = defaultdict(list) +if 'enable_audio' not in st.session_state: + st.session_state['enable_audio'] = True # Turn TTS on/off + +# ▶ API Keys openai_api_key = os.getenv('OPENAI_API_KEY', "") anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "") -xai_key = os.getenv('xai', "") - +xai_key = os.getenv('xai',"") if 'OPENAI_API_KEY' in st.secrets: openai_api_key = st.secrets['OPENAI_API_KEY'] if 'ANTHROPIC_API_KEY' in st.secrets: @@ -123,15 +134,11 @@ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_OR HF_KEY = os.getenv('HF_KEY') API_URL = os.getenv('API_URL') -# File type emojis for display +# ▶ Helper constants FILE_EMOJIS = { "md": "📝", "mp3": "🎵", - "wav": "🔊", - "pdf": "📄", - "txt": "📋", - "json": "📊", - "csv": "📈" + "wav": "🔊" } # ───────────────────────────────────────────────────────── @@ -139,7 +146,14 @@ FILE_EMOJIS = { # ───────────────────────────────────────────────────────── class PerformanceTimer: - """Context manager for timing operations with automatic logging.""" + """ + ⏱️ A context manager for timing operations with automatic logging. + Usage: + with PerformanceTimer("my_operation"): + # do something + The duration is stored into `st.session_state['operation_timings']` + and appended to the `performance_metrics` list. + """ def __init__(self, operation_name: str): self.operation_name = operation_name self.start_time = None @@ -155,7 +169,10 @@ class PerformanceTimer: st.session_state['performance_metrics'][self.operation_name].append(duration) def log_performance_metrics(): - """Display performance metrics in the sidebar.""" + """ + 📈 Display performance metrics in the sidebar, including a timing breakdown + and a small bar chart of average times. + """ st.sidebar.markdown("### ⏱️ Performance Metrics") metrics = st.session_state['operation_timings'] @@ -163,50 +180,180 @@ def log_performance_metrics(): total_time = sum(metrics.values()) st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s") - # Create timing breakdown + # Break down each operation time for operation, duration in metrics.items(): percentage = (duration / total_time) * 100 st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)") # Show timing history chart - if st.session_state['performance_metrics']: - history_data = [] - for op, times in st.session_state['performance_metrics'].items(): - if times: # Only show if we have timing data - avg_time = sum(times) / len(times) - history_data.append({"Operation": op, "Avg Time (s)": avg_time}) - - if history_data: # Create chart if we have data - st.sidebar.markdown("### 📈 Timing History") - chart_data = pd.DataFrame(history_data) - st.sidebar.bar_chart(chart_data.set_index("Operation")) + history_data = [] + for op, times in st.session_state['performance_metrics'].items(): + if times: # Only if we have data + avg_time = sum(times) / len(times) + history_data.append({"Operation": op, "Avg Time (s)": avg_time}) + + if history_data: + st.sidebar.markdown("### 📊 Timing History (Avg)") + chart_data = pd.DataFrame(history_data) + st.sidebar.bar_chart(chart_data.set_index("Operation")) + +# ───────────────────────────────────────────────────────── +# 3. HELPER FUNCTIONS (FILENAMES, LINKS, MARQUEE, ETC.) +# ───────────────────────────────────────────────────────── + +def get_central_time(): + """🌎 Get current time in US Central timezone.""" + central = pytz.timezone('US/Central') + return datetime.now(central) + +def format_timestamp_prefix(): + """📅 Generate a timestamp prefix: MM_dd_yy_hh_mm_AM/PM.""" + ct = get_central_time() + return ct.strftime("%m_%d_%y_%I_%M_%p") + +def initialize_marquee_settings(): + """🌈 Initialize marquee defaults if needed.""" + if 'marquee_settings' not in st.session_state: + st.session_state['marquee_settings'] = { + "background": "#1E1E1E", + "color": "#FFFFFF", + "font-size": "14px", + "animationDuration": "20s", + "width": "100%", + "lineHeight": "35px" + } + +def get_marquee_settings(): + """🔧 Retrieve marquee settings from session.""" + initialize_marquee_settings() + return st.session_state['marquee_settings'] + +def update_marquee_settings_ui(): + """🖌 Add color pickers & sliders for marquee config in the sidebar.""" + st.sidebar.markdown("### 🎯 Marquee Settings") + cols = st.sidebar.columns(2) + with cols[0]: + bg_color = st.color_picker("🎨 Background", + st.session_state['marquee_settings']["background"], + key="bg_color_picker") + text_color = st.color_picker("✍️ Text", + st.session_state['marquee_settings']["color"], + key="text_color_picker") + with cols[1]: + font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider") + duration = st.slider("⏱️ Speed (secs)", 1, 20, 20, key="duration_slider") + + st.session_state['marquee_settings'].update({ + "background": bg_color, + "color": text_color, + "font-size": f"{font_size}px", + "animationDuration": f"{duration}s" + }) + +def display_marquee(text, settings, key_suffix=""): + """ + 🎉 Show a marquee text with style from the marquee settings. + Automatically truncates text to ~280 chars to avoid overflow. + """ + truncated_text = text[:280] + "..." if len(text) > 280 else text + streamlit_marquee( + content=truncated_text, + **settings, + key=f"marquee_{key_suffix}" + ) + st.write("") + +def get_high_info_terms(text: str, top_n=10) -> list: + """ + 📌 Extract top_n frequent words & bigrams (excluding common stopwords). + Useful for generating short descriptive keywords from Q/A content. + """ + stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with']) + words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower()) + bi_grams = [' '.join(pair) for pair in zip(words, words[1:])] + combined = words + bi_grams + filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2] + counter = Counter(filtered) + return [term for term, freq in counter.most_common(top_n)] + +def clean_text_for_filename(text: str) -> str: + """ + 🏷️ Remove special chars & short unhelpful words from text for safer filenames. + Returns a lowercased, underscore-joined token string. + """ + text = text.lower() + text = re.sub(r'[^\w\s-]', '', text) + words = text.split() + stop_short = set(['the', 'and', 'for', 'with', 'this', 'that', 'ai', 'library']) + filtered = [w for w in words if len(w) > 3 and w not in stop_short] + return '_'.join(filtered)[:200] + +def generate_filename(prompt, response, file_type="md", max_length=200): + """ + 📁 Create a shortened filename based on prompt+response content: + 1) Extract top info terms, + 2) Combine snippet from prompt+response, + 3) Remove duplicates, + 4) Truncate if needed. + """ + prefix = format_timestamp_prefix() + "_" + combined_text = (prompt + " " + response)[:200] + info_terms = get_high_info_terms(combined_text, top_n=5) + snippet = (prompt[:40] + " " + response[:40]).strip() + snippet_cleaned = clean_text_for_filename(snippet) + + # Remove duplicates + name_parts = info_terms + [snippet_cleaned] + seen = set() + unique_parts = [] + for part in name_parts: + if part not in seen: + seen.add(part) + unique_parts.append(part) + + full_name = '_'.join(unique_parts).strip('_') + leftover_chars = max_length - len(prefix) - len(file_type) - 1 + if len(full_name) > leftover_chars: + full_name = full_name[:leftover_chars] + + return f"{prefix}{full_name}.{file_type}" + +def create_file(prompt, response, file_type="md"): + """ + 📝 Create a text file from prompt + response with a sanitized filename. + Returns the created filename. + """ + filename = generate_filename(prompt.strip(), response.strip(), file_type) + with open(filename, 'w', encoding='utf-8') as f: + f.write(prompt + "\n\n" + response) + return filename # ───────────────────────────────────────────────────────── -# 3. OPTIMIZED AUDIO GENERATION +# 4. OPTIMIZED AUDIO GENERATION (ASYNC TTS + CACHING) # ───────────────────────────────────────────────────────── def clean_for_speech(text: str) -> str: - """Clean up text for TTS output with enhanced cleaning.""" + """ + 🔉 Clean up text for TTS output with enhanced cleaning. + Removes markdown, code blocks, links, etc. + """ with PerformanceTimer("text_cleaning"): - # Remove markdown formatting - text = re.sub(r'#+ ', '', text) # Remove headers - text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Clean links - text = re.sub(r'[*_~`]', '', text) # Remove emphasis markers - + # Remove markdown headers + text = re.sub(r'#+ ', '', text) + # Remove link formats [text](url) + text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) + # Remove emphasis markers (*, _, ~, `) + text = re.sub(r'[*_~`]', '', text) # Remove code blocks text = re.sub(r'```[\s\S]*?```', '', text) text = re.sub(r'`[^`]*`', '', text) - - # Clean up whitespace - text = re.sub(r'\s+', ' ', text) - text = text.replace("\n", " ") + # Remove excess whitespace + text = re.sub(r'\s+', ' ', text).replace("\n", " ") + # Remove hidden S tokens text = text.replace("", " ") - # Remove URLs text = re.sub(r'https?://\S+', '', text) text = re.sub(r'\(https?://[^\)]+\)', '', text) - - # Final cleanup text = text.strip() return text @@ -217,37 +364,42 @@ async def async_edge_tts_generate( pitch: int = 0, file_format: str = "mp3" ) -> Tuple[Optional[str], float]: - """Asynchronous TTS generation with performance tracking and caching.""" + """ + 🎶 Asynchronous TTS generation with caching and performance tracking. + Returns (filename, generation_time). + """ with PerformanceTimer("tts_generation") as timer: - # Clean and validate text + # ▶ Clean & validate text text = clean_for_speech(text) if not text.strip(): return None, 0 - # Check cache + # ▶ Check cache (avoid regenerating the same TTS) cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}" if cache_key in st.session_state['audio_cache']: return st.session_state['audio_cache'][cache_key], 0 try: - # Generate audio + # ▶ Generate audio rate_str = f"{rate:+d}%" pitch_str = f"{pitch:+d}Hz" communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str) - # Generate unique filename + # ▶ Generate unique filename timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}" - # Save audio file + # ▶ Save audio file await communicate.save(filename) - # Cache result + # ▶ Store in cache st.session_state['audio_cache'][cache_key] = filename - return filename, time.time() - timer.start_time + # ▶ Return path + timing + return filename, time.time() - timer.start_time + except Exception as e: - st.error(f"Error generating audio: {str(e)}") + st.error(f"❌ Error generating audio: {str(e)}") return None, 0 async def async_save_qa_with_audio( @@ -255,17 +407,19 @@ async def async_save_qa_with_audio( answer: str, voice: Optional[str] = None ) -> Tuple[str, Optional[str], float, float]: - """Asynchronously save Q&A to markdown and generate audio with timing.""" + """ + 📝 Asynchronously save Q&A to markdown, then generate audio if enabled. + Returns (md_file, audio_file, md_time, audio_time). + """ voice = voice or st.session_state['tts_voice'] with PerformanceTimer("qa_save") as timer: - # Save markdown + # ▶ Save Q/A as markdown md_start = time.time() - combined_text = f"# Question\n{question}\n\n# Answer\n{answer}" md_file = create_file(question, answer, "md") md_time = time.time() - md_start - # Generate audio if enabled + # ▶ Generate audio (if globally enabled) audio_file = None audio_time = 0 if st.session_state['enable_audio']: @@ -278,13 +432,11 @@ async def async_save_qa_with_audio( return md_file, audio_file, md_time, audio_time -def create_download_link_with_cache( - file_path: str, - file_type: str = "mp3" -) -> str: - """Create download link with caching and error handling.""" +def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str: + """ + ⬇️ Create a download link for a file with caching & error handling. + """ with PerformanceTimer("download_link_generation"): - # Check cache first cache_key = f"dl_{file_path}" if cache_key in st.session_state['download_link_cache']: return st.session_state['download_link_cache'][cache_key] @@ -292,9 +444,8 @@ def create_download_link_with_cache( try: with open(file_path, "rb") as f: b64 = base64.b64encode(f.read()).decode() - - # Generate appropriate link based on file type filename = os.path.basename(file_path) + if file_type == "mp3": link = f'🎵 Download {filename}' elif file_type == "wav": @@ -304,50 +455,290 @@ def create_download_link_with_cache( else: link = f'⬇️ Download {filename}' - # Cache and return st.session_state['download_link_cache'][cache_key] = link return link - + except Exception as e: - st.error(f"Error creating download link: {str(e)}") + st.error(f"❌ Error creating download link: {str(e)}") return "" +# ───────────────────────────────────────────────────────── +# 5. RESEARCH / ARXIV FUNCTIONS +# ───────────────────────────────────────────────────────── + +def parse_arxiv_refs(ref_text: str): + """ + 📜 Given a multi-line markdown with Arxiv references, + parse them into a list of dicts: {date, title, url, authors, summary}. + """ + if not ref_text: + return [] + results = [] + current_paper = {} + lines = ref_text.split('\n') + + for i, line in enumerate(lines): + if line.count('|') == 2: + # Found a new paper line + if current_paper: + results.append(current_paper) + if len(results) >= 20: + break + try: + header_parts = line.strip('* ').split('|') + date = header_parts[0].strip() + title = header_parts[1].strip() + url_match = re.search(r'(https://arxiv.org/\S+)', line) + url = url_match.group(1) if url_match else f"paper_{len(results)}" + + current_paper = { + 'date': date, + 'title': title, + 'url': url, + 'authors': '', + 'summary': '', + 'full_audio': None, + 'download_base64': '', + } + except Exception as e: + st.warning(f"⚠️ Error parsing paper header: {str(e)}") + current_paper = {} + continue + elif current_paper: + # If authors not set, fill it; otherwise, fill summary + if not current_paper['authors']: + current_paper['authors'] = line.strip('* ') + else: + if current_paper['summary']: + current_paper['summary'] += ' ' + line.strip() + else: + current_paper['summary'] = line.strip() + + if current_paper: + results.append(current_paper) + + return results[:20] + +def create_paper_links_md(papers): + """ + 🔗 Create a minimal .md content linking to each paper's Arxiv URL. + """ + lines = ["# Paper Links\n"] + for i, p in enumerate(papers, start=1): + lines.append(f"{i}. **{p['title']}** — [Arxiv]({p['url']})") + return "\n".join(lines) + +async def create_paper_audio_files(papers, input_question): + """ + 🎧 For each paper, generate TTS audio summary and store the path in `paper['full_audio']`. + Also creates a base64 download link in `paper['download_base64']`. + """ + for paper in papers: + try: + audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}" + audio_text = clean_for_speech(audio_text) + file_format = st.session_state['audio_format'] + audio_file, _ = await async_edge_tts_generate( + audio_text, + voice=st.session_state['tts_voice'], + file_format=file_format + ) + paper['full_audio'] = audio_file + + if audio_file: + # Convert to base64 link + ext = file_format + download_link = create_download_link_with_cache(audio_file, file_type=ext) + paper['download_base64'] = download_link + + except Exception as e: + st.warning(f"⚠️ Error processing paper {paper['title']}: {str(e)}") + paper['full_audio'] = None + paper['download_base64'] = '' + +def display_papers(papers, marquee_settings): + """ + 📑 Display paper info in the main area with marquee + expanders + audio. + """ + st.write("## 🔎 Research Papers") + for i, paper in enumerate(papers, start=1): + marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]} | 📝 {paper['summary'][:200]}" + display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}") + + with st.expander(f"{i}. 📄 {paper['title']}", expanded=True): + st.markdown(f"**{paper['date']} | {paper['title']}** — [Arxiv Link]({paper['url']})") + st.markdown(f"*Authors:* {paper['authors']}") + st.markdown(paper['summary']) + if paper.get('full_audio'): + st.write("📚 **Paper Audio**") + st.audio(paper['full_audio']) + if paper['download_base64']: + st.markdown(paper['download_base64'], unsafe_allow_html=True) + +def display_papers_in_sidebar(papers): + """ + 🔎 Mirrors the paper listing in the sidebar with expanders, audio, etc. + """ + st.sidebar.title("🎶 Papers & Audio") + for i, paper in enumerate(papers, start=1): + with st.sidebar.expander(f"{i}. {paper['title']}"): + st.markdown(f"**Arxiv:** [Link]({paper['url']})") + if paper['full_audio']: + st.audio(paper['full_audio']) + if paper['download_base64']: + st.markdown(paper['download_base64'], unsafe_allow_html=True) + st.markdown(f"**Authors:** {paper['authors']}") + if paper['summary']: + st.markdown(f"**Summary:** {paper['summary'][:300]}...") + +# ───────────────────────────────────────────────────────── +# 6. ZIP FUNCTION +# ───────────────────────────────────────────────────────── + +def create_zip_of_files(md_files, mp3_files, wav_files, input_question): + """ + 📦 Zip up all relevant files, generating a short name from high-info terms. + Returns the zip filename if created, else None. + """ + md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md'] + all_files = md_files + mp3_files + wav_files + if not all_files: + return None + + all_content = [] + for f in all_files: + if f.endswith('.md'): + with open(f, "r", encoding='utf-8') as file: + all_content.append(file.read()) + elif f.endswith('.mp3') or f.endswith('.wav'): + basename = os.path.splitext(os.path.basename(f))[0] + words = basename.replace('_', ' ') + all_content.append(words) + + all_content.append(input_question) + combined_content = " ".join(all_content) + info_terms = get_high_info_terms(combined_content, top_n=10) + + timestamp = format_timestamp_prefix() + name_text = '-'.join(term for term in info_terms[:5]) + short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip" + + with zipfile.ZipFile(short_zip_name, 'w') as z: + for f in all_files: + z.write(f) + return short_zip_name + +# ───────────────────────────────────────────────────────── +# 7. MAIN AI LOGIC: LOOKUP & TAB HANDLERS +# ───────────────────────────────────────────────────────── + +def perform_ai_lookup( + q, + vocal_summary=True, + extended_refs=False, + titles_summary=True, + full_audio=False +): + """ + 🔮 Main routine that uses Anthropic (Claude) + optional Gradio ArXiv RAG pipeline. + Currently demonstrates calling Anthropic and returning the text. + """ + with PerformanceTimer("ai_lookup"): + start = time.time() + + # ▶ Example call to Anthropic (Claude) + client = anthropic.Anthropic(api_key=anthropic_key) + user_input = q + + # Here we do a minimal prompt, just to show the call + # (You can enhance your prompt engineering as needed) + response = client.completions.create( + model="claude-2", + max_tokens_to_sample=512, + prompt=f"{anthropic.HUMAN_PROMPT} {user_input}{anthropic.AI_PROMPT}" + ) + + result_text = response.completion.strip() + + # ▶ Print and store + st.write("### Claude's reply 🧠:") + st.markdown(result_text) + + # ▶ We'll add to the chat history + st.session_state.chat_history.append({"user": q, "claude": result_text}) + + # ▶ Return final text + end = time.time() + st.write(f"**Elapsed:** {end - start:.2f}s") + + return result_text + +async def process_voice_input(text): + """ + 🎤 When user sends a voice query, we run the AI lookup + Q/A with audio. + Then we store the resulting markdown & audio in session or disk. + """ + if not text: + return + st.subheader("🔍 Search Results") + + # ▶ Call AI + result = perform_ai_lookup( + text, + vocal_summary=True, + extended_refs=False, + titles_summary=True, + full_audio=True + ) + + # ▶ Save Q&A as Markdown + audio (async) + md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio(text, result) + st.subheader("📝 Generated Files") + st.write(f"**Markdown:** {md_file} (saved in {md_time:.2f}s)") + if audio_file: + st.write(f"**Audio:** {audio_file} (generated in {audio_time:.2f}s)") + st.audio(audio_file) + dl_link = create_download_link_with_cache(audio_file, file_type=st.session_state['audio_format']) + st.markdown(dl_link, unsafe_allow_html=True) -# --- def display_voice_tab(): - """Display voice input tab with TTS settings.""" + """ + 🎙️ Display the voice input tab with TTS settings and real-time usage. + """ st.subheader("🎤 Voice Input") - # Voice Settings Section + # ▶ Voice Settings st.markdown("### 🎤 Voice Settings") - captionFemale='Top: 🌸 **Aria** – 🎶 **Jenny** – 🌺 **Sonia** – 🌌 **Natasha** – 🌷 **Clara**' - captionMale='Bottom: 🌟 **Guy** – 🛠️ **Ryan** – 🎻 **William** – 🌟 **Liam**' - st.sidebar.image('Group Picture - Voices.png', caption=captionFemale + ' - ' + captionMale) - st.sidebar.markdown(""" + caption_female = 'Top: 🌸 **Aria** – 🎶 **Jenny** – 🌺 **Sonia** – 🌌 **Natasha** – 🌷 **Clara**' + caption_male = 'Bottom: 🌟 **Guy** – 🛠️ **Ryan** – 🎻 **William** – 🌟 **Liam**' + + # Optionally, replace with your own local image or comment out + # st.sidebar.image('Group Picture - Voices.png', caption=caption_female + ' | ' + caption_male) + st.sidebar.markdown(""" # 🎙️ Voice Character Agent Selector 🎭 - 1. Female: - - 🌸 **Aria** – Female: 🌟 The voice of elegance and creativity, perfect for soothing storytelling or inspiring ideas. - - 🎶 **Jenny** – Female: 💖 Sweet and friendly, she’s the go-to for warm, conversational tones. - - 🌺 **Sonia** – Female: 💃 Bold and confident, ideal for commanding attention and delivering with flair. - - 🌌 **Natasha** – Female: ✨ Enigmatic and sophisticated, Natasha is great for a touch of mystery and charm. - - 🌷 **Clara** – Female: 🎀 Cheerful and gentle, perfect for nurturing, empathetic conversations. - --- - 2. Male: - - 🌟 **Guy** – Male: 🎩 Sophisticated and versatile, a natural fit for clear and authoritative delivery. - - 🛠️ **Ryan** – Male: 🤝 Down-to-earth and approachable, ideal for friendly and casual exchanges. - - 🎻 **William** – Male: 📚 Classic and refined, perfect for a scholarly or thoughtful tone. - - 🌟 **Liam** – Male: ⚡ Energetic and upbeat, great for dynamic, engaging interactions. - + *Female Voices*: + - 🌸 **Aria** – Elegant, creative storytelling + - 🎶 **Jenny** – Friendly, conversational + - 🌺 **Sonia** – Bold, confident + - 🌌 **Natasha** – Sophisticated, mysterious + - 🌷 **Clara** – Cheerful, empathetic + + *Male Voices*: + - 🌟 **Guy** – Authoritative, versatile + - 🛠️ **Ryan** – Approachable, casual + - 🎻 **William** – Classic, scholarly + - 🌟 **Liam** – Energetic, engaging """) + selected_voice = st.selectbox( - "Select TTS Voice:", + "👄 Select TTS Voice:", options=EDGE_TTS_VOICES, index=EDGE_TTS_VOICES.index(st.session_state['tts_voice']) ) - # Audio Format Selection + # ▶ Audio Format st.markdown("### 🔊 Audio Format") selected_format = st.radio( "Choose Audio Format:", @@ -355,1515 +746,237 @@ def display_voice_tab(): index=0 ) - # Update session state if settings change + # ▶ Update session state if changed if selected_voice != st.session_state['tts_voice']: st.session_state['tts_voice'] = selected_voice - st.rerun() + st.experimental_rerun() if selected_format.lower() != st.session_state['audio_format']: st.session_state['audio_format'] = selected_format.lower() - st.rerun() + st.experimental_rerun() - # Text Input Area + # ▶ Text Input user_text = st.text_area("💬 Message:", height=100) user_text = user_text.strip().replace('\n', ' ') - # Send Button + # ▶ Send Button if st.button("📨 Send"): - process_voice_input(user_text) + # Run our process_voice_input as an async function + asyncio.run(process_voice_input(user_text)) - # Chat History + # ▶ Chat History st.subheader("📜 Chat History") for c in st.session_state.chat_history: st.write("**You:**", c["user"]) st.write("**Response:**", c["claude"]) -def display_arxiv_tab(): - """Display ArXiv search tab with options.""" - st.subheader("🔍 Query ArXiv") - q = st.text_input("🔍 Query:", key="arxiv_query") - - # Options Section - st.markdown("### 🎛 Options") - col1, col2 = st.columns(2) - - with col1: - vocal_summary = st.checkbox("🎙 Short Audio", value=True, - key="option_vocal_summary") - extended_refs = st.checkbox("📜 Long Refs", value=False, - key="option_extended_refs") - - with col2: - titles_summary = st.checkbox("🔖 Titles Only", value=True, - key="option_titles_summary") - full_audio = st.checkbox("📚 Full Audio", value=False, - key="option_full_audio") - - full_transcript = st.checkbox("🧾 Full Transcript", value=False, - key="option_full_transcript") - - if q and st.button("🔍 Run Search"): - st.session_state.last_query = q - result, timings = perform_ai_lookup( - q, - vocal_summary=vocal_summary, - extended_refs=extended_refs, - titles_summary=titles_summary, - full_audio=full_audio - ) - - if full_transcript: - create_file(q, result, "md") - -def display_media_tab(): - """Display media gallery tab with audio, images, and video.""" - st.header("📸 Media Gallery") - - # Create tabs for different media types - tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"]) - - # Audio Files Tab - with tabs[0]: - st.subheader("🎵 Audio Files") - audio_files = glob.glob("*.mp3") + glob.glob("*.wav") - - if audio_files: - for audio_file in audio_files: - with st.expander(os.path.basename(audio_file)): - st.audio(audio_file) - ext = os.path.splitext(audio_file)[1].replace('.', '') - dl_link = get_download_link(audio_file, file_type=ext) - st.markdown(dl_link, unsafe_allow_html=True) - else: - st.write("No audio files found.") - - # Images Tab - with tabs[1]: - st.subheader("🖼 Image Files") - image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg") - - if image_files: - cols = st.slider("Columns:", 1, 5, 3, key="cols_images") - image_cols = st.columns(cols) - - for i, img_file in enumerate(image_files): - with image_cols[i % cols]: - try: - img = Image.open(img_file) - st.image(img, use_column_width=True) - except Exception as e: - st.error(f"Error loading image {img_file}: {str(e)}") - else: - st.write("No images found.") - - # Video Tab - with tabs[2]: - st.subheader("🎥 Video Files") - video_files = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi") - - if video_files: - for video_file in video_files: - with st.expander(os.path.basename(video_file)): - st.video(video_file) - else: - st.write("No videos found.") +# ───────────────────────────────────────────────────────── +# FILE HISTORY SIDEBAR +# ───────────────────────────────────────────────────────── -def display_editor_tab(): - """Display text editor tab with file management.""" - st.subheader("📝 Text Editor") +def display_file_history_in_sidebar(): + """ + 📂 Shows a history of local .md, .mp3, .wav files (newest first), + with quick icons and optional download links. + """ + st.sidebar.markdown("---") + st.sidebar.markdown("### 📂 File History") - # File Management Section - st.markdown("### 📂 File Management") - - # File Selection + # ▶ Gather all files md_files = glob.glob("*.md") - selected_file = st.selectbox( - "Select file to edit:", - ["New File"] + md_files, - key="file_selector" - ) + mp3_files = glob.glob("*.mp3") + wav_files = glob.glob("*.wav") + all_files = md_files + mp3_files + wav_files - # Edit Area - if selected_file == "New File": - new_filename = st.text_input("New filename (without extension):") - file_content = st.text_area("Content:", height=300) - - if st.button("💾 Save File"): - if new_filename: - try: - with open(f"{new_filename}.md", 'w', encoding='utf-8') as f: - f.write(file_content) - st.success(f"File {new_filename}.md saved successfully!") - st.session_state.should_rerun = True - except Exception as e: - st.error(f"Error saving file: {str(e)}") + if not all_files: + st.sidebar.write("No files found.") + return + + # ▶ Sort newest first + all_files = sorted(all_files, key=os.path.getmtime, reverse=True) + + for f in all_files: + fname = os.path.basename(f) + ext = os.path.splitext(fname)[1].lower().strip('.') + emoji = FILE_EMOJIS.get(ext, '📦') + time_str = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S") + + with st.sidebar.expander(f"{emoji} {fname}"): + st.write(f"**Modified:** {time_str}") + if ext == "md": + with open(f, "r", encoding="utf-8") as file_in: + snippet = file_in.read(200).replace("\n", " ") + if len(snippet) == 200: + snippet += "..." + st.write(snippet) + dl_link = create_download_link_with_cache(f, file_type="md") + st.markdown(dl_link, unsafe_allow_html=True) + elif ext in ["mp3","wav"]: + st.audio(f) + dl_link = create_download_link_with_cache(f, file_type=ext) + st.markdown(dl_link, unsafe_allow_html=True) else: - st.warning("Please enter a filename.") - else: - try: - # Load existing file content - with open(selected_file, 'r', encoding='utf-8') as f: - file_content = f.read() - - # Edit existing file - edited_content = st.text_area( - "Edit content:", - value=file_content, - height=300 - ) - - col1, col2 = st.columns(2) - with col1: - if st.button("💾 Save Changes"): - try: - with open(selected_file, 'w', encoding='utf-8') as f: - f.write(edited_content) - st.success("Changes saved successfully!") - except Exception as e: - st.error(f"Error saving changes: {str(e)}") - - with col2: - if st.button("🗑 Delete File"): - try: - os.remove(selected_file) - st.success(f"File {selected_file} deleted successfully!") - st.session_state.should_rerun = True - except Exception as e: - st.error(f"Error deleting file: {str(e)}") - - except Exception as e: - st.error(f"Error loading file {selected_file}: {str(e)}") + dl_link = create_download_link_with_cache(f) + st.markdown(dl_link, unsafe_allow_html=True) -def display_settings_tab(): - """Display application settings tab.""" - st.subheader("⚙️ Settings") - - # General Settings - st.markdown("### 🔧 General Settings") - - # Theme Selection - theme = st.selectbox( - "Color Theme:", - ["Dark", "Light", "Custom"], - index=0 - ) - - if theme == "Custom": - st.color_picker("Primary Color:", "#1E1E1E") - st.color_picker("Secondary Color:", "#2D2D2D") - - # Performance Settings - st.markdown("### ⚡ Performance Settings") - - # Cache Settings - cache_size = st.slider( - "Maximum Cache Size (MB):", - 0, 1000, 100 - ) - - if st.button("Clear Cache"): - st.session_state['audio_cache'] = {} - st.session_state['paper_cache'] = {} - st.session_state['download_link_cache'] = {} - st.success("Cache cleared successfully!") - - # API Settings - st.markdown("### 🔑 API Settings") - - # Show/hide API keys - show_keys = st.checkbox("Show API Keys") - if show_keys: - st.text_input("OpenAI API Key:", value=openai_api_key) - st.text_input("Anthropic API Key:", value=anthropic_key) - - # Save Settings - if st.button("💾 Save Settings"): - st.success("Settings saved successfully!") - st.session_state.should_rerun = True +# ───────────────────────────────────────────────────────── +# MAIN APP +# ───────────────────────────────────────────────────────── +def main(): + # ▶ 1) Setup marquee UI in the sidebar + update_marquee_settings_ui() + marquee_settings = get_marquee_settings() + + # ▶ 2) Display the marquee welcome + display_marquee( + st.session_state['marquee_content'], + {**marquee_settings, "font-size": "28px", "lineHeight": "50px"}, + key_suffix="welcome" + ) + # ▶ 3) Main action tabs + tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"], + horizontal=True) -def get_download_link(file: str, file_type: str = "zip") -> str: - """ - Convert a file to base64 and return an HTML link for download. - Supports multiple file types with appropriate MIME types. - """ - try: - with open(file, "rb") as f: - b64 = base64.b64encode(f.read()).decode() - - # Get filename for display - filename = os.path.basename(file) - - # Define MIME types and emoji icons for different file types - mime_types = { - "zip": ("application/zip", "📂"), - "mp3": ("audio/mpeg", "🎵"), - "wav": ("audio/wav", "🔊"), - "md": ("text/markdown", "📝"), - "pdf": ("application/pdf", "📄"), - "txt": ("text/plain", "📋"), - "json": ("application/json", "📊"), - "csv": ("text/csv", "📈"), - "png": ("image/png", "🖼"), - "jpg": ("image/jpeg", "🖼"), - "jpeg": ("image/jpeg", "🖼") - } - - # Get MIME type and emoji for file - mime_type, emoji = mime_types.get( - file_type.lower(), - ("application/octet-stream", "⬇️") - ) - - # Create download link with appropriate MIME type - link = f'{emoji} Download {filename}' - - return link - - except FileNotFoundError: - return f"
❌ File not found: {file}
" - except Exception as e: - return f"❌ Error creating download link: {str(e)}
" + # ▶ 4) Show or hide custom component (optional example) + mycomponent = components.declare_component("mycomponent", path="mycomponent") + val = mycomponent(my_input_value="Hello from MyComponent") -def play_and_download_audio(file_path: str, file_type: str = "mp3"): - """ - Display audio player and download link for audio file. - Includes error handling and file validation. - """ - if not file_path: - st.warning("No audio file provided.") - return - - if not os.path.exists(file_path): - st.error(f"Audio file not found: {file_path}") - return - - try: - # Display audio player - st.audio(file_path) - - # Create and display download link - dl_link = get_download_link(file_path, file_type=file_type) - st.markdown(dl_link, unsafe_allow_html=True) - - except Exception as e: - st.error(f"Error playing audio: {str(e)}") - -def get_file_info(file_path: str) -> dict: - """ - Get detailed information about a file. - Returns dictionary with size, modification time, and other metadata. - """ - try: - stats = os.stat(file_path) - - # Get basic file information - info = { - 'name': os.path.basename(file_path), - 'path': file_path, - 'size': stats.st_size, - 'modified': datetime.fromtimestamp(stats.st_mtime), - 'created': datetime.fromtimestamp(stats.st_ctime), - 'type': os.path.splitext(file_path)[1].lower().strip('.'), - } - - # Add formatted size - if info['size'] < 1024: - info['size_fmt'] = f"{info['size']} B" - elif info['size'] < 1024 * 1024: - info['size_fmt'] = f"{info['size']/1024:.1f} KB" - else: - info['size_fmt'] = f"{info['size']/(1024*1024):.1f} MB" - - # Add formatted dates - info['modified_fmt'] = info['modified'].strftime("%Y-%m-%d %H:%M:%S") - info['created_fmt'] = info['created'].strftime("%Y-%m-%d %H:%M:%S") - - return info - - except Exception as e: - st.error(f"Error getting file info: {str(e)}") - return None - -def sanitize_filename(filename: str) -> str: - """ - Clean and sanitize a filename to ensure it's safe for filesystem. - Removes/replaces unsafe characters and enforces length limits. - """ - # Remove or replace unsafe characters - filename = re.sub(r'[<>:"/\\|?*]', '_', filename) - - # Remove leading/trailing spaces and dots - filename = filename.strip('. ') - - # Limit length (reserving space for extension) - max_length = 255 - name, ext = os.path.splitext(filename) - if len(filename) > max_length: - return name[:(max_length-len(ext))] + ext - - return filename - -def create_file_with_metadata(filename: str, content: str, metadata: dict = None): - """ - Create a file with optional metadata header. - Useful for storing additional information with files. - """ - try: - # Sanitize filename - safe_filename = sanitize_filename(filename) - - # Ensure directory exists - os.makedirs(os.path.dirname(safe_filename) or '.', exist_ok=True) - - # Prepare content with metadata - if metadata: - metadata_str = json.dumps(metadata, indent=2) - full_content = f"""--- -{metadata_str} ---- -{content}""" - else: - full_content = content - - # Write file - with open(safe_filename, 'w', encoding='utf-8') as f: - f.write(full_content) - - return safe_filename - - except Exception as e: - st.error(f"Error creating file: {str(e)}") - return None - -def read_file_with_metadata(filename: str) -> tuple: - """ - Read a file and extract any metadata header. - Returns tuple of (content, metadata). - """ - try: - with open(filename, 'r', encoding='utf-8') as f: - content = f.read() - - # Check for metadata section - if content.startswith('---\n'): - # Find end of metadata section - end_meta = content.find('\n---\n', 4) - if end_meta != -1: - try: - metadata = json.loads(content[4:end_meta]) - content = content[end_meta+5:] - return content, metadata - except json.JSONDecodeError: - pass - - return content, None - - except Exception as e: - st.error(f"Error reading file: {str(e)}") - return None, None - -def archive_files(file_paths: list, archive_name: str = None) -> str: - """ - Create a zip archive containing the specified files. - Returns path to created archive. - """ - try: - # Generate archive name if not provided - if not archive_name: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - archive_name = f"archive_{timestamp}.zip" - - # Create zip file - with zipfile.ZipFile(archive_name, 'w', zipfile.ZIP_DEFLATED) as zf: - for file_path in file_paths: - if os.path.exists(file_path): - zf.write(file_path, os.path.basename(file_path)) - - return archive_name - - except Exception as e: - st.error(f"Error creating archive: {str(e)}") - return None - -def list_files_by_type(directory: str = ".", - extensions: list = None, - recursive: bool = False) -> dict: - """ - List files in directory filtered by extension. - Returns dict grouping files by type. - """ - try: - if extensions is None: - extensions = ['md', 'mp3', 'wav', 'pdf', 'txt', 'json', 'csv'] - - files = {} - pattern = "**/*" if recursive else "*" - - for ext in extensions: - glob_pattern = f"{pattern}.{ext}" - matches = glob.glob(os.path.join(directory, glob_pattern), - recursive=recursive) - if matches: - files[ext] = matches - - return files - - except Exception as e: - st.error(f"Error listing files: {str(e)}") - return {} - - - - - - -def get_central_time() -> datetime: - """Get current time in US Central timezone.""" - central = pytz.timezone('US/Central') - return datetime.now(central) - -def format_timestamp_prefix() -> str: - """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM.""" - ct = get_central_time() - return ct.strftime("%m_%d_%y_%I_%M_%p") - -def get_formatted_time(dt: datetime = None, - timezone: str = 'US/Central', - include_timezone: bool = True, - include_seconds: bool = False) -> str: - """ - Format a datetime object with specified options. - If no datetime is provided, uses current time. - """ - if dt is None: - tz = pytz.timezone(timezone) - dt = datetime.now(tz) - elif dt.tzinfo is None: - tz = pytz.timezone(timezone) - dt = tz.localize(dt) - - format_string = "%Y-%m-%d %I:%M" - if include_seconds: - format_string += ":%S" - format_string += " %p" - if include_timezone: - format_string += " %Z" - - return dt.strftime(format_string) - -def parse_timestamp(timestamp_str: str, - timezone: str = 'US/Central') -> Optional[datetime]: - """ - Parse a timestamp string in various formats. - Returns timezone-aware datetime object. - """ - try: - # Try different format patterns - patterns = [ - "%m_%d_%y_%I_%M_%p", # Standard app format - "%Y-%m-%d %I:%M %p", # Common 12-hour format - "%Y-%m-%d %H:%M", # 24-hour format - "%m/%d/%y %I:%M %p", # US date format - "%d/%m/%y %I:%M %p" # European date format - ] - - dt = None - for pattern in patterns: - try: - dt = datetime.strptime(timestamp_str, pattern) - break - except ValueError: - continue - - if dt is None: - raise ValueError(f"Could not parse timestamp: {timestamp_str}") - - # Add timezone if not present - if dt.tzinfo is None: - tz = pytz.timezone(timezone) - dt = tz.localize(dt) - - return dt - - except Exception as e: - st.error(f"Error parsing timestamp: {str(e)}") - return None - -def get_time_ago(dt: datetime) -> str: - """ - Convert datetime to human-readable "time ago" format. - E.g., "2 hours ago", "3 days ago", etc. - """ - try: - now = datetime.now(dt.tzinfo) - diff = now - dt - - seconds = diff.total_seconds() - - if seconds < 60: - return "just now" - elif seconds < 3600: - minutes = int(seconds / 60) - return f"{minutes} minute{'s' if minutes != 1 else ''} ago" - elif seconds < 86400: - hours = int(seconds / 3600) - return f"{hours} hour{'s' if hours != 1 else ''} ago" - elif seconds < 604800: - days = int(seconds / 86400) - return f"{days} day{'s' if days != 1 else ''} ago" - elif seconds < 2592000: - weeks = int(seconds / 604800) - return f"{weeks} week{'s' if weeks != 1 else ''} ago" - elif seconds < 31536000: - months = int(seconds / 2592000) - return f"{months} month{'s' if months != 1 else ''} ago" + if val: + val_stripped = val.replace('\\n', ' ') + edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100) + run_option = st.selectbox("Model:", ["Arxiv", "Other (demo)"]) + col1, col2 = st.columns(2) + with col1: + autorun = st.checkbox("⚙ AutoRun", value=True) + with col2: + full_audio = st.checkbox("📚FullAudio", value=False) + + input_changed = (val != st.session_state.old_val) + + if autorun and input_changed: + st.session_state.old_val = val + st.session_state.last_query = edited_input + perform_ai_lookup(edited_input, + vocal_summary=True, + extended_refs=False, + titles_summary=True, + full_audio=full_audio) else: - years = int(seconds / 31536000) - return f"{years} year{'s' if years != 1 else ''} ago" - - except Exception as e: - st.error(f"Error calculating time ago: {str(e)}") - return "unknown time ago" - -def format_duration(seconds: float) -> str: - """ - Format a duration in seconds to human-readable string. - E.g., "2m 30s", "1h 15m", etc. - """ - try: - if seconds < 0: - return "invalid duration" - - # Handle special cases - if seconds < 1: - return f"{seconds * 1000:.0f}ms" - if seconds < 60: - return f"{seconds:.1f}s" - - # Calculate hours, minutes, seconds - hours = int(seconds // 3600) - minutes = int((seconds % 3600) // 60) - secs = seconds % 60 - - # Build duration string - parts = [] - if hours > 0: - parts.append(f"{hours}h") - if minutes > 0: - parts.append(f"{minutes}m") - if secs > 0 and hours == 0: # Only show seconds if less than an hour - parts.append(f"{secs:.1f}s") - - return " ".join(parts) - - except Exception as e: - st.error(f"Error formatting duration: {str(e)}") - return "unknown duration" - - - - - - -async def create_paper_audio_files(papers: List[Dict], input_question: str): - """Generate audio files for papers asynchronously with improved naming.""" - with PerformanceTimer("paper_audio_generation"): - tasks = [] - for paper in papers: - try: - # Prepare text for audio generation - audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}" - audio_text = clean_for_speech(audio_text) - - # Create sanitized title for filename - safe_title = paper['title'].lower() - safe_title = re.sub(r'[^\w\s-]', '', safe_title) # Remove special chars - safe_title = re.sub(r'\s+', '_', safe_title) # Replace spaces with underscores - safe_title = safe_title[:100] # Limit length - - # Generate timestamp - timestamp = format_timestamp_prefix() - - # Create filename with timestamp and title - filename = f"{timestamp}_{safe_title}.{st.session_state['audio_format']}" - - # Create task for audio generation - async def generate_audio(text, filename): - rate_str = "0%" - pitch_str = "0Hz" - communicate = edge_tts.Communicate(text, st.session_state['tts_voice']) - await communicate.save(filename) - return filename - - task = generate_audio(audio_text, filename) - tasks.append((paper, task, filename)) - - except Exception as e: - st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}") - continue - - # Process all audio generation tasks concurrently - for paper, task, filename in tasks: - try: - audio_file = await task - if audio_file: - paper['full_audio'] = audio_file - if st.session_state['enable_download']: - paper['download_base64'] = create_download_link_with_cache( - audio_file, - st.session_state['audio_format'] - ) - except Exception as e: - st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}") - paper['full_audio'] = None - paper['download_base64'] = '' - - - - - - - - - -# ───────────────────────────────────────────────────────── -# 4. PAPER PROCESSING & DISPLAY -# ───────────────────────────────────────────────────────── - -def parse_arxiv_refs(ref_text: str) -> List[Dict[str, str]]: - """Parse arxiv references with improved error handling.""" - if not ref_text: - return [] - - with PerformanceTimer("parse_refs"): - results = [] - current_paper = {} - lines = ref_text.split('\n') - - for i, line in enumerate(lines): - try: - if line.count('|') == 2: - # Found a new paper line - if current_paper: - results.append(current_paper) - if len(results) >= 20: # Limit to 20 papers - break - - # Parse header parts - header_parts = line.strip('* ').split('|') - date = header_parts[0].strip() - title = header_parts[1].strip() - url_match = re.search(r'(https://arxiv.org/\S+)', line) - url = url_match.group(1) if url_match else f"paper_{len(results)}" - - current_paper = { - 'date': date, - 'title': title, - 'url': url, - 'authors': '', - 'summary': '', - 'full_audio': None, - 'download_base64': '', - } - - elif current_paper: - # Add content to current paper - line = line.strip('* ') - if not current_paper['authors']: - current_paper['authors'] = line - else: - if current_paper['summary']: - current_paper['summary'] += ' ' + line - else: - current_paper['summary'] = line - - except Exception as e: - st.warning(f"Error parsing line {i}: {str(e)}") - continue - - # Add final paper if exists - if current_paper: - results.append(current_paper) - - return results[:20] # Ensure we don't exceed 20 papers - -async def create_paper_audio_files(papers: List[Dict], input_question: str): - """Generate audio files for papers asynchronously with progress tracking.""" - with PerformanceTimer("paper_audio_generation"): - tasks = [] - for paper in papers: - try: - # Prepare text for audio generation - audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}" - audio_text = clean_for_speech(audio_text) - - # Create task for audio generation - task = async_edge_tts_generate( - audio_text, - voice=st.session_state['tts_voice'], - file_format=st.session_state['audio_format'] - ) - tasks.append((paper, task)) - - except Exception as e: - st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}") - continue - - # Process all audio generation tasks concurrently - for paper, task in tasks: - try: - audio_file, gen_time = await task - if audio_file: - paper['full_audio'] = audio_file - if st.session_state['enable_download']: - paper['download_base64'] = create_download_link_with_cache( - audio_file, - st.session_state['audio_format'] - ) - except Exception as e: - st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}") - paper['full_audio'] = None - paper['download_base64'] = '' - - -def initialize_marquee_settings(): - """Initialize default marquee settings if not present in session state.""" - if 'marquee_settings' not in st.session_state: - st.session_state['marquee_settings'] = { - "background": "#1E1E1E", - "color": "#FFFFFF", - "font-size": "14px", - "animationDuration": "20s", - "width": "100%", - "lineHeight": "35px" - } - -def get_marquee_settings(): - """Get current marquee settings, initializing if needed.""" - initialize_marquee_settings() - return st.session_state['marquee_settings'] - -def update_marquee_settings_ui(): - """Add color pickers & sliders for marquee configuration in sidebar.""" - st.sidebar.markdown("### 🎯 Marquee Settings") - - # Create two columns for settings - cols = st.sidebar.columns(2) - - # Column 1: Color settings - with cols[0]: - # Background color picker - bg_color = st.color_picker( - "🎨 Background", - st.session_state['marquee_settings']["background"], - key="bg_color_picker" - ) - - # Text color picker - text_color = st.color_picker( - "✍️ Text Color", - st.session_state['marquee_settings']["color"], - key="text_color_picker" - ) - - # Column 2: Size and speed settings - with cols[1]: - # Font size slider - font_size = st.slider( - "📏 Font Size", - 10, 24, 14, - key="font_size_slider" - ) - - # Animation duration slider - duration = st.slider( - "⏱️ Animation Speed", - 1, 20, 20, - key="duration_slider" - ) - - # Update session state with new settings - st.session_state['marquee_settings'].update({ - "background": bg_color, - "color": text_color, - "font-size": f"{font_size}px", - "animationDuration": f"{duration}s" - }) - -def display_marquee(text: str, settings: dict, key_suffix: str = ""): - """Show marquee text with specified style settings.""" - # Truncate long text to prevent performance issues - truncated_text = text[:280] + "..." if len(text) > 280 else text - - # Display the marquee - streamlit_marquee( - content=truncated_text, - **settings, - key=f"marquee_{key_suffix}" - ) - - # Add spacing after marquee - st.write("") - -def create_paper_links_md(papers: list) -> str: - """Creates a minimal markdown file linking to each paper's arxiv URL.""" - lines = ["# Paper Links\n"] - for i, p in enumerate(papers, start=1): - lines.append(f"{i}. **{p['title']}** — [Arxiv]({p['url']})") - return "\n".join(lines) - -def apply_custom_styling(): - """Apply custom CSS styling to the app.""" + if st.button("▶ Run"): + st.session_state.old_val = val + st.session_state.last_query = edited_input + perform_ai_lookup(edited_input, + vocal_summary=True, + extended_refs=False, + titles_summary=True, + full_audio=full_audio) + + # ───────────────────────────────────────────────────────── + # TAB: ArXiv + # ───────────────────────────────────────────────────────── + if tab_main == "🔍 ArXiv": + st.subheader("🔍 Query ArXiv") + q = st.text_input("🔍 Query:", key="arxiv_query") + + st.markdown("### 🎛 Options") + vocal_summary = st.checkbox("🎙ShortAudio", value=True, key="option_vocal_summary") + extended_refs = st.checkbox("📜LongRefs", value=False, key="option_extended_refs") + titles_summary = st.checkbox("🔖TitlesOnly", value=True, key="option_titles_summary") + full_audio = st.checkbox("📚FullAudio", value=False, key="option_full_audio") + full_transcript = st.checkbox("🧾FullTranscript", value=False, key="option_full_transcript") + + if q and st.button("🔍Run"): + st.session_state.last_query = q + result = perform_ai_lookup(q, + vocal_summary=vocal_summary, + extended_refs=extended_refs, + titles_summary=titles_summary, + full_audio=full_audio) + if full_transcript: + create_file(q, result, "md") + + # ───────────────────────────────────────────────────────── + # TAB: Voice + # ───────────────────────────────────────────────────────── + elif tab_main == "🎤 Voice": + display_voice_tab() + + # ───────────────────────────────────────────────────────── + # TAB: Media + # ─────────────────��─────────────────────────────────────── + elif tab_main == "📸 Media": + st.header("📸 Media Gallery") + tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"]) + + # ▶ AUDIO sub-tab + with tabs[0]: + st.subheader("🎵 Audio Files") + audio_files = glob.glob("*.mp3") + glob.glob("*.wav") + if audio_files: + for a in audio_files: + with st.expander(os.path.basename(a)): + st.audio(a) + ext = os.path.splitext(a)[1].replace('.', '') + dl_link = create_download_link_with_cache(a, file_type=ext) + st.markdown(dl_link, unsafe_allow_html=True) + else: + st.write("No audio files found.") + + # ▶ IMAGES sub-tab + with tabs[1]: + st.subheader("🖼 Image Files") + imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg") + if imgs: + c = st.slider("Cols", 1, 5, 3, key="cols_images") + cols = st.columns(c) + for i, f in enumerate(imgs): + with cols[i % c]: + st.image(Image.open(f), use_container_width=True) + else: + st.write("No images found.") + + # ▶ VIDEO sub-tab + with tabs[2]: + st.subheader("🎥 Video Files") + vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi") + if vids: + for v in vids: + with st.expander(os.path.basename(v)): + st.video(v) + else: + st.write("No videos found.") + + # ───────────────────────────────────────────────────────── + # TAB: Editor + # ───────────────────────────────────────────────────────── + elif tab_main == "📝 Editor": + st.write("### 📝 File Editor (Minimal Demo)") + st.write("Select or create a file to edit. More advanced features can be added as needed.") + + # ───────────────────────────────────────────────────────── + # SIDEBAR: FILE HISTORY + PERFORMANCE METRICS + # ───────────────────────────────────────────────────────── + display_file_history_in_sidebar() + log_performance_metrics() + + # ▶ Some light CSS styling st.markdown(""" - + """, unsafe_allow_html=True) -def display_performance_metrics(timings: dict): - """Display performance metrics with visualizations.""" - st.sidebar.markdown("### ⏱️ Performance Metrics") - - # Calculate total time - total_time = sum(timings.values()) - st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s") - - # Show breakdown of operations - st.sidebar.markdown("#### Operation Breakdown") - for operation, duration in timings.items(): - percentage = (duration / total_time) * 100 if total_time > 0 else 0 - st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)") - - # Create a progress bar for visual representation - st.sidebar.progress(percentage / 100) - - - - -def display_papers(papers: List[Dict], marquee_settings: Dict): - """Display paper information with enhanced visualization.""" - with PerformanceTimer("paper_display"): - st.write("## 📚 Research Papers") - - # Create tabs for different views - tab1, tab2 = st.tabs(["📋 List View", "📊 Grid View"]) - - with tab1: - for i, paper in enumerate(papers, start=1): - # Create marquee for paper title - marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]}" - display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}") - - # Paper details expander - with st.expander(f"{i}. 📄 {paper['title']}", expanded=True): - # Create PDF link - pdf_url = paper['url'].replace('/abs/', '/pdf/') - - # Display paper information - st.markdown(f""" - **Date:** {paper['date']} - **Title:** {paper['title']} - **Links:** 📄 [Abstract]({paper['url']}) | 📑 [PDF]({pdf_url}) - """) - st.markdown(f"**Authors:** {paper['authors']}") - st.markdown(f"**Summary:** {paper['summary']}") - - # Audio player and download if available - if paper.get('full_audio'): - st.write("🎧 Paper Audio Summary") - st.audio(paper['full_audio']) - if paper['download_base64']: - st.markdown(paper['download_base64'], unsafe_allow_html=True) - - with tab2: - # Create a grid layout of papers - cols = st.columns(3) - for i, paper in enumerate(papers): - with cols[i % 3]: - st.markdown(f""" - ### 📄 {paper['title'][:50]}... - **Date:** {paper['date']} - [Abstract]({paper['url']}) | [PDF]({paper['url'].replace('/abs/', '/pdf/')}) - """) - if paper.get('full_audio'): - st.audio(paper['full_audio']) - -def display_papers_in_sidebar(papers: List[Dict]): - """Display paper listing in sidebar with lazy loading.""" - with PerformanceTimer("sidebar_display"): - st.sidebar.title("📚 Papers Overview") - - # Add filter options - filter_date = st.sidebar.date_input("Filter by date:", None) - search_term = st.sidebar.text_input("Search papers:", "") - - # Filter papers based on criteria - filtered_papers = papers - if filter_date: - filtered_papers = [p for p in filtered_papers - if filter_date.strftime("%Y-%m-%d") in p['date']] - if search_term: - search_lower = search_term.lower() - filtered_papers = [p for p in filtered_papers - if search_lower in p['title'].lower() - or search_lower in p['authors'].lower()] - - # Display filtered papers - for i, paper in enumerate(filtered_papers, start=1): - paper_key = f"paper_{paper['url']}" - if paper_key not in st.session_state: - st.session_state[paper_key] = False - - with st.sidebar.expander(f"{i}. {paper['title'][:50]}...", expanded=False): - # Paper metadata - st.markdown(f"**Date:** {paper['date']}") - - # Links - pdf_url = paper['url'].replace('/abs/', '/pdf/') - st.markdown(f"📄 [Abstract]({paper['url']}) | 📑 [PDF]({pdf_url})") - - # Preview of authors and summary - st.markdown(f"**Authors:** {paper['authors'][:100]}...") - if paper['summary']: - st.markdown(f"**Summary:** {paper['summary'][:200]}...") - - # Audio controls - if paper['full_audio']: - if st.button("🎵 Load Audio", key=f"btn_{paper_key}"): - st.session_state[paper_key] = True - - if st.session_state[paper_key]: - st.audio(paper['full_audio']) - if paper['download_base64']: - st.markdown(paper['download_base64'], unsafe_allow_html=True) + # ▶ Rerun if needed + if st.session_state.should_rerun: + st.session_state.should_rerun = False + st.experimental_rerun() # ───────────────────────────────────────────────────────── -# 5. FILE MANAGEMENT & HISTORY +# 8. RUN APP # ───────────────────────────────────────────────────────── -def create_file(prompt: str, response: str, file_type: str = "md") -> str: - """Create a file with proper naming and error handling.""" - with PerformanceTimer("file_creation"): - try: - # Generate filename - filename = generate_filename(prompt.strip(), response.strip(), file_type) - - # Ensure directory exists - os.makedirs("generated_files", exist_ok=True) - filepath = os.path.join("generated_files", filename) - - # Write content - with open(filepath, 'w', encoding='utf-8') as f: - if file_type == "md": - f.write(f"# Query\n{prompt}\n\n# Response\n{response}") - else: - f.write(f"{prompt}\n\n{response}") - - return filepath - - except Exception as e: - st.error(f"Error creating file: {str(e)}") - return "" - -def get_high_info_terms(text: str, top_n: int = 10) -> List[str]: - """Extract most informative terms from text.""" - # Common English stop words to filter out - stop_words = set([ - 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', - 'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'over', - 'after', 'the', 'this', 'that', 'these', 'those', 'what', 'which' - ]) - - # Extract words and bi-grams - words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower()) - bi_grams = [' '.join(pair) for pair in zip(words, words[1:])] - - # Combine and filter terms - combined = words + bi_grams - filtered = [term for term in combined - if term not in stop_words - and len(term.split()) <= 2 - and len(term) > 3] - - # Count and return top terms - counter = Counter(filtered) - return [term for term, freq in counter.most_common(top_n)] - -def clean_text_for_filename(text: str) -> str: - """Clean text for use in filenames.""" - # Remove special characters - text = text.lower() - text = re.sub(r'[^\w\s-]', '', text) - - # Remove common unhelpful words - stop_words = set([ - 'the', 'and', 'for', 'with', 'this', 'that', 'what', 'which', - 'where', 'when', 'why', 'how', 'who', 'whom', 'whose', 'ai', - 'library', 'function', 'method', 'class', 'object', 'variable' - ]) - - words = text.split() - filtered = [w for w in words if len(w) > 3 and w not in stop_words] - - return '_'.join(filtered)[:200] - -def generate_filename(prompt: str, response: str, file_type: str = "md", - max_length: int = 200) -> str: - """Generate descriptive filename from content.""" - # Get timestamp prefix - prefix = format_timestamp_prefix() + "_" - - # Extract informative terms - combined_text = (prompt + " " + response)[:500] - info_terms = get_high_info_terms(combined_text, top_n=5) - - # Get content snippet - snippet = (prompt[:40] + " " + response[:40]).strip() - snippet_cleaned = clean_text_for_filename(snippet) - - # Combine and deduplicate parts - name_parts = info_terms + [snippet_cleaned] - seen = set() - unique_parts = [] - for part in name_parts: - if part not in seen: - seen.add(part) - unique_parts.append(part) - - # Create final filename - full_name = '_'.join(unique_parts).strip('_') - leftover_chars = max_length - len(prefix) - len(file_type) - 1 - if len(full_name) > leftover_chars: - full_name = full_name[:leftover_chars] - - return f"{prefix}{full_name}.{file_type}" - -def create_zip_of_files(md_files: List[str], mp3_files: List[str], - wav_files: List[str], input_question: str) -> Optional[str]: - """Create zip archive of files with optimization.""" - with PerformanceTimer("zip_creation"): - # Filter out readme and empty files - md_files = [f for f in md_files - if os.path.basename(f).lower() != 'readme.md' - and os.path.getsize(f) > 0] - - all_files = md_files + mp3_files + wav_files - if not all_files: - return None - - try: - # Generate zip name - all_content = [] - for f in all_files: - if f.endswith('.md'): - with open(f, 'r', encoding='utf-8') as file: - all_content.append(file.read()) - elif f.endswith(('.mp3', '.wav')): - basename = os.path.splitext(os.path.basename(f))[0] - all_content.append(basename.replace('_', ' ')) - - all_content.append(input_question) - combined_content = " ".join(all_content) - info_terms = get_high_info_terms(combined_content, top_n=10) - - timestamp = format_timestamp_prefix() - name_text = '-'.join(term for term in info_terms[:5]) - zip_name = f"archive_{timestamp}_{name_text[:50]}.zip" - - # Create zip file - with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as z: - for f in all_files: - z.write(f, os.path.basename(f)) - - return zip_name - - except Exception as e: - st.error(f"Error creating zip archive: {str(e)}") - return None - -# ───────────────────────────────────────────────────────── -# 6. OPTIMIZED AI LOOKUP & PROCESSING -# ───────────────────────────────────────────────────────── - -def perform_ai_lookup(q: str, vocal_summary: bool = True, - extended_refs: bool = False, - titles_summary: bool = True, - full_audio: bool = False) -> Tuple[str, Dict[str, float]]: - """Main AI lookup routine with performance optimization.""" - with PerformanceTimer("total_lookup") as total_timer: - timings = {} - - # Add operation controls if not present - if 'operation_controls' not in st.session_state: - st.sidebar.markdown("### 🔧 Operation Controls") - st.session_state['enable_claude'] = st.sidebar.checkbox( - "Enable Claude Search", - value=st.session_state['enable_claude'] - ) - st.session_state['enable_audio'] = st.sidebar.checkbox( - "Generate Audio", - value=st.session_state['enable_audio'] - ) - st.session_state['enable_download'] = st.sidebar.checkbox( - "Create Download Links", - value=st.session_state['enable_download'] - ) - st.session_state['operation_controls'] = True - - result = "" - - # 1. Claude API (if enabled) - if st.session_state['enable_claude']: - with PerformanceTimer("claude_api") as claude_timer: - try: - client = anthropic.Anthropic(api_key=anthropic_key) - response = client.messages.create( - model="claude-3-sonnet-20240229", - max_tokens=1000, - messages=[{"role": "user", "content": q}] - ) - st.write("Claude's reply 🧠:") - st.markdown(response.content[0].text) - result = response.content[0].text - timings['claude_api'] = time.time() - claude_timer.start_time - except Exception as e: - st.error(f"Error with Claude API: {str(e)}") - result = "Error occurred during Claude API call" - timings['claude_api'] = 0 - - # 2. Async save and audio generation - async def process_results(): - with PerformanceTimer("results_processing") as proc_timer: - md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio( - q, result - ) - timings['markdown_save'] = md_time - timings['audio_generation'] = audio_time - - if audio_file and st.session_state['enable_audio']: - st.subheader("📝 Main Response Audio") - st.audio(audio_file) - - if st.session_state['enable_download']: - st.markdown( - create_download_link_with_cache( - audio_file, - st.session_state['audio_format'] - ), - unsafe_allow_html=True - ) - - # Run async operations - asyncio.run(process_results()) - - # 3. Arxiv RAG with performance tracking - if st.session_state['enable_claude']: - with PerformanceTimer("arxiv_rag") as rag_timer: - try: - st.write('Running Arxiv RAG with Claude inputs.') - client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") - refs = client.predict( - q, - 10, - "Semantic Search", - "mistralai/Mixtral-8x7B-Instruct-v0.1", - api_name="/update_with_rag_md" - )[0] - timings['arxiv_rag'] = time.time() - rag_timer.start_time - - # Process papers asynchronously - papers = parse_arxiv_refs(refs) - if papers: - with PerformanceTimer("paper_processing") as paper_timer: - async def process_papers(): - # Create minimal links page - paper_links = create_paper_links_md(papers) - links_file = create_file(q, paper_links, "md") - st.markdown(paper_links) - - # Generate audio and display papers - await create_paper_audio_files(papers, q) - display_papers(papers, get_marquee_settings()) - display_papers_in_sidebar(papers) - - asyncio.run(process_papers()) - timings['paper_processing'] = time.time() - paper_timer.start_time - else: - st.warning("No papers found in the response.") - except Exception as e: - st.error(f"Error during Arxiv RAG: {str(e)}") - timings['arxiv_rag'] = 0 - - return result, timings - -def process_voice_input(text: str): - """Process voice input with enhanced error handling and feedback.""" - if not text: - st.warning("Please provide some input text.") - return - - with PerformanceTimer("voice_processing"): - try: - st.subheader("🔍 Search Results") - result, timings = perform_ai_lookup( - text, - vocal_summary=True, - extended_refs=False, - titles_summary=True, - full_audio=True - ) - - # Save results - md_file, audio_file = save_qa_with_audio(text, result) - - # Display results - st.subheader("📝 Generated Files") - col1, col2 = st.columns(2) - with col1: - st.write(f"📄 Markdown: {os.path.basename(md_file)}") - st.markdown(get_download_link(md_file, "md"), unsafe_allow_html=True) - - with col2: - if audio_file: - st.write(f"🎵 Audio: {os.path.basename(audio_file)}") - play_and_download_audio( - audio_file, - st.session_state['audio_format'] - ) - - except Exception as e: - st.error(f"Error processing voice input: {str(e)}") - -# ───────────────────────────────────────────────────────── -# 7. SIDEBAR AND FILE HISTORY -# ───────────────────────────────────────────────────────── - -def display_file_history_in_sidebar(): - """Display file history with enhanced organization and filtering.""" - with PerformanceTimer("file_history"): - st.sidebar.markdown("---") - st.sidebar.markdown("### 📂 File History") - - # Gather all files - md_files = glob.glob("*.md") - mp3_files = glob.glob("*.mp3") - wav_files = glob.glob("*.wav") - all_files = md_files + mp3_files + wav_files - - if not all_files: - st.sidebar.write("No files found.") - return - - # Add file management controls - col1, col2 = st.sidebar.columns(2) - with col1: - if st.button("🗑 Delete All"): - try: - for f in all_files: - os.remove(f) - st.session_state.should_rerun = True - st.success("All files deleted successfully.") - except Exception as e: - st.error(f"Error deleting files: {str(e)}") - - with col2: - if st.button("⬇️ Zip All"): - zip_name = create_zip_of_files( - md_files, - mp3_files, - wav_files, - st.session_state.get('last_query', '') - ) - if zip_name: - st.sidebar.markdown( - get_download_link(zip_name, "zip"), - unsafe_allow_html=True - ) - - # Add file filtering options - st.sidebar.markdown("### 🔍 Filter Files") - file_search = st.sidebar.text_input("Search files:", "") - file_type_filter = st.sidebar.multiselect( - "File types:", - ["Markdown", "Audio"], - default=["Markdown", "Audio"] - ) - - # Sort files by modification time - all_files.sort(key=os.path.getmtime, reverse=True) - - # Filter files based on search and type - filtered_files = [] - for f in all_files: - if file_search.lower() in f.lower(): - ext = os.path.splitext(f)[1].lower() - if (("Markdown" in file_type_filter and ext == ".md") or - ("Audio" in file_type_filter and ext in [".mp3", ".wav"])): - filtered_files.append(f) - - # Display filtered files - for f in filtered_files: - fname = os.path.basename(f) - ext = os.path.splitext(fname)[1].lower().strip('.') - emoji = FILE_EMOJIS.get(ext, '📦') - - # Get file metadata - mod_time = datetime.fromtimestamp(os.path.getmtime(f)) - time_str = mod_time.strftime("%Y-%m-%d %H:%M:%S") - file_size = os.path.getsize(f) / 1024 # Size in KB - - with st.sidebar.expander(f"{emoji} {fname}"): - st.write(f"**Modified:** {time_str}") - st.write(f"**Size:** {file_size:.1f} KB") - - if ext == "md": - try: - with open(f, "r", encoding="utf-8") as file_in: - snippet = file_in.read(200).replace("\n", " ") - if len(snippet) == 200: - snippet += "..." - st.write(snippet) - st.markdown( - get_download_link(f, file_type="md"), - unsafe_allow_html=True - ) - except Exception as e: - st.error(f"Error reading markdown file: {str(e)}") - - elif ext in ["mp3", "wav"]: - st.audio(f) - st.markdown( - get_download_link(f, file_type=ext), - unsafe_allow_html=True - ) - - else: - st.markdown(get_download_link(f), unsafe_allow_html=True) - -# ───────────────────────────────────────────────────────── -# 8. MAIN APPLICATION -# ───────────────────────────────────────────────────────── - -def main(): - """Main application entry point with enhanced UI and error handling.""" - try: - # 1. Setup marquee UI in sidebar - update_marquee_settings_ui() - marquee_settings = get_marquee_settings() - - # 2. Display welcome marquee - display_marquee( - st.session_state['marquee_content'], - {**marquee_settings, "font-size": "28px", "lineHeight": "50px"}, - key_suffix="welcome" - ) - - # 3. Main action tabs - tab_main = st.radio( - "Action:", - ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"], - horizontal=True - ) - - # Custom component usage - mycomponent = components.declare_component( - "mycomponent", - path="mycomponent" - ) - val = mycomponent(my_input_value="Hello") - - if val: - # Process input value - val_stripped = val.replace('\\n', ' ') - edited_input = st.text_area( - "✏️ Edit Input:", - value=val_stripped, - height=100 - ) - - # Model selection and options - run_option = st.selectbox("Model:", ["Arxiv"]) - col1, col2 = st.columns(2) - - with col1: - #autorun = st.checkbox("⚙ AutoRun", value=True) - autorun = st.checkbox("⚙ AutoRun", value=False) - with col2: - full_audio = st.checkbox("📚 FullAudio", value=False) - - # Check for input changes - input_changed = (val != st.session_state.old_val) - - if autorun and input_changed: - st.session_state.old_val = val - st.session_state.last_query = edited_input - result, timings = perform_ai_lookup( - edited_input, - vocal_summary=True, - extended_refs=False, - titles_summary=True, - full_audio=full_audio - ) - - # Display performance metrics - display_performance_metrics(timings) - - else: - if st.button("▶ Run"): - st.session_state.old_val = val - st.session_state.last_query = edited_input - result, timings = perform_ai_lookup( - edited_input, - vocal_summary=True, - extended_refs=False, - titles_summary=True, - full_audio=full_audio - ) - - # Display performance metrics - display_performance_metrics(timings) - - # Tab-specific content - if tab_main == "🔍 ArXiv": - display_arxiv_tab() - elif tab_main == "🎤 Voice": - display_voice_tab() - elif tab_main == "📸 Media": - display_media_tab() - elif tab_main == "📝 Editor": - display_editor_tab() - - # Display file history - display_file_history_in_sidebar() - - # Apply styling - apply_custom_styling() - - # Check for rerun - if st.session_state.should_rerun: - st.session_state.should_rerun = False - st.rerun() - - except Exception as e: - st.error(f"An error occurred in the main application: {str(e)}") - st.info("Please try refreshing the page or contact support if the issue persists.") - if __name__ == "__main__": - main() \ No newline at end of file + main()