awacke1 commited on
Commit
05a7287
·
verified ·
1 Parent(s): 5f86351

Create backup16.app.py

Browse files
Files changed (1) hide show
  1. backup16.app.py +879 -0
backup16.app.py ADDED
@@ -0,0 +1,879 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
3
+ import plotly.graph_objects as go
4
+ import streamlit.components.v1 as components
5
+ from datetime import datetime
6
+ from audio_recorder_streamlit import audio_recorder
7
+ from bs4 import BeautifulSoup
8
+ from collections import defaultdict, deque, Counter
9
+ from dotenv import load_dotenv
10
+ from gradio_client import Client
11
+ from huggingface_hub import InferenceClient
12
+ from io import BytesIO
13
+ from PIL import Image
14
+ from PyPDF2 import PdfReader
15
+ from urllib.parse import quote
16
+ from xml.etree import ElementTree as ET
17
+ from openai import OpenAI
18
+ import extra_streamlit_components as stx
19
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
+ import asyncio
21
+ import edge_tts
22
+ from streamlit_marquee import streamlit_marquee
23
+
24
+ # 🎯 1. Core Configuration & Setup
25
+ st.set_page_config(
26
+ page_title="🚲TalkingAIResearcher🏆",
27
+ page_icon="🚲🏆",
28
+ layout="wide",
29
+ initial_sidebar_state="auto",
30
+ menu_items={
31
+ 'Get Help': 'https://huggingface.co/awacke1',
32
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
33
+ 'About': "🚲TalkingAIResearcher🏆"
34
+ }
35
+ )
36
+ load_dotenv()
37
+
38
+ # Add available English voices for Edge TTS
39
+ EDGE_TTS_VOICES = [
40
+ "en-US-AriaNeural",
41
+ "en-US-GuyNeural",
42
+ "en-US-JennyNeural",
43
+ "en-GB-SoniaNeural",
44
+ "en-GB-RyanNeural",
45
+ "en-AU-NatashaNeural",
46
+ "en-AU-WilliamNeural",
47
+ "en-CA-ClaraNeural",
48
+ "en-CA-LiamNeural"
49
+ ]
50
+
51
+ # Initialize session state variables
52
+ if 'marquee_settings' not in st.session_state:
53
+ # Default to 20s animationDuration instead of 10s:
54
+ st.session_state['marquee_settings'] = {
55
+ "background": "#1E1E1E",
56
+ "color": "#FFFFFF",
57
+ "font-size": "14px",
58
+ "animationDuration": "20s", # <- changed to 20s
59
+ "width": "100%",
60
+ "lineHeight": "35px"
61
+ }
62
+
63
+ if 'tts_voice' not in st.session_state:
64
+ st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
65
+ if 'audio_format' not in st.session_state:
66
+ st.session_state['audio_format'] = 'mp3'
67
+ if 'transcript_history' not in st.session_state:
68
+ st.session_state['transcript_history'] = []
69
+ if 'chat_history' not in st.session_state:
70
+ st.session_state['chat_history'] = []
71
+ if 'openai_model' not in st.session_state:
72
+ st.session_state['openai_model'] = "gpt-4o-2024-05-13"
73
+ if 'messages' not in st.session_state:
74
+ st.session_state['messages'] = []
75
+ if 'last_voice_input' not in st.session_state:
76
+ st.session_state['last_voice_input'] = ""
77
+ if 'editing_file' not in st.session_state:
78
+ st.session_state['editing_file'] = None
79
+ if 'edit_new_name' not in st.session_state:
80
+ st.session_state['edit_new_name'] = ""
81
+ if 'edit_new_content' not in st.session_state:
82
+ st.session_state['edit_new_content'] = ""
83
+ if 'viewing_prefix' not in st.session_state:
84
+ st.session_state['viewing_prefix'] = None
85
+ if 'should_rerun' not in st.session_state:
86
+ st.session_state['should_rerun'] = False
87
+ if 'old_val' not in st.session_state:
88
+ st.session_state['old_val'] = None
89
+ if 'last_query' not in st.session_state:
90
+ st.session_state['last_query'] = ""
91
+ if 'marquee_content' not in st.session_state:
92
+ st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
93
+
94
+ # 🔑 2. API Setup & Clients
95
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
96
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
97
+ xai_key = os.getenv('xai',"")
98
+ if 'OPENAI_API_KEY' in st.secrets:
99
+ openai_api_key = st.secrets['OPENAI_API_KEY']
100
+ if 'ANTHROPIC_API_KEY' in st.secrets:
101
+ anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
102
+
103
+ openai.api_key = openai_api_key
104
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
105
+ HF_KEY = os.getenv('HF_KEY')
106
+ API_URL = os.getenv('API_URL')
107
+
108
+ # Constants
109
+ FILE_EMOJIS = {
110
+ "md": "📝",
111
+ "mp3": "🎵",
112
+ "wav": "🔊"
113
+ }
114
+
115
+ def get_central_time():
116
+ """Get current time in US Central timezone"""
117
+ central = pytz.timezone('US/Central')
118
+ return datetime.now(central)
119
+
120
+ def format_timestamp_prefix():
121
+ """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM"""
122
+ ct = get_central_time()
123
+ return ct.strftime("%m_%d_%y_%I_%M_%p")
124
+
125
+ def initialize_marquee_settings():
126
+ """Initialize marquee settings in session state"""
127
+ if 'marquee_settings' not in st.session_state:
128
+ st.session_state['marquee_settings'] = {
129
+ "background": "#1E1E1E",
130
+ "color": "#FFFFFF",
131
+ "font-size": "14px",
132
+ "animationDuration": "20s", # ensure 20s stays
133
+ "width": "100%",
134
+ "lineHeight": "35px"
135
+ }
136
+
137
+ def get_marquee_settings():
138
+ """Get or update marquee settings from session state"""
139
+ initialize_marquee_settings()
140
+ return st.session_state['marquee_settings']
141
+
142
+ def update_marquee_settings_ui():
143
+ """Update marquee settings via UI controls"""
144
+ initialize_marquee_settings()
145
+ st.sidebar.markdown("### 🎯 Marquee Settings")
146
+ cols = st.sidebar.columns(2)
147
+ with cols[0]:
148
+ bg_color = st.color_picker("🎨 Background",
149
+ st.session_state['marquee_settings']["background"],
150
+ key="bg_color_picker")
151
+ text_color = st.color_picker("✍️ Text",
152
+ st.session_state['marquee_settings']["color"],
153
+ key="text_color_picker")
154
+ with cols[1]:
155
+ font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider")
156
+ # The default is now 20, not 10
157
+ duration = st.slider("⏱️ Speed", 1, 20, 20, key="duration_slider")
158
+
159
+ st.session_state['marquee_settings'].update({
160
+ "background": bg_color,
161
+ "color": text_color,
162
+ "font-size": f"{font_size}px",
163
+ "animationDuration": f"{duration}s"
164
+ })
165
+
166
+ def display_marquee(text, settings, key_suffix=""):
167
+ """Display marquee with given text and settings"""
168
+ truncated_text = text[:280] + "..." if len(text) > 280 else text
169
+ streamlit_marquee(
170
+ content=truncated_text,
171
+ **settings,
172
+ key=f"marquee_{key_suffix}"
173
+ )
174
+ st.write("")
175
+
176
+ def get_high_info_terms(text: str, top_n=10) -> list:
177
+ stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
178
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
179
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
180
+ combined = words + bi_grams
181
+ filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2]
182
+ counter = Counter(filtered)
183
+ return [term for term, freq in counter.most_common(top_n)]
184
+
185
+ def clean_text_for_filename(text: str) -> str:
186
+ text = text.lower()
187
+ text = re.sub(r'[^\w\s-]', '', text)
188
+ words = text.split()
189
+ stop_short = set(['the', 'and', 'for', 'with', 'this', 'that'])
190
+ filtered = [w for w in words if len(w) > 3 and w not in stop_short]
191
+ return '_'.join(filtered)[:200]
192
+
193
+
194
+
195
+ def generate_filename(prompt, response, file_type="md", max_length=200):
196
+ """
197
+ Generate a shortened filename by:
198
+ 1. Extracting high-info terms
199
+ 2. Creating a smaller snippet
200
+ 3. Cleaning & joining them
201
+ 4. Truncating if needed
202
+ """
203
+ prefix = format_timestamp_prefix() + "_"
204
+ combined_text = (prompt + " " + response)[:200] # limit huge text input
205
+ info_terms = get_high_info_terms(combined_text, top_n=5)
206
+ snippet = (prompt[:40] + " " + response[:40]).strip()
207
+ snippet_cleaned = clean_text_for_filename(snippet)
208
+ name_parts = info_terms + [snippet_cleaned]
209
+ full_name = '_'.join(name_parts).strip('_')
210
+ leftover_chars = max_length - len(prefix) - len(file_type) - 1
211
+ if len(full_name) > leftover_chars:
212
+ full_name = full_name[:leftover_chars]
213
+
214
+ return f"{prefix}{full_name}.{file_type}"
215
+
216
+
217
+ def create_file(prompt, response, file_type="md"):
218
+ """
219
+ Create a file using the shortened filename from generate_filename().
220
+ """
221
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
222
+ with open(filename, 'w', encoding='utf-8') as f:
223
+ f.write(prompt + "\n\n" + response)
224
+ return filename
225
+
226
+
227
+
228
+
229
+
230
+
231
+
232
+ def generate_filename_old(prompt, response, file_type="md"):
233
+ prefix = format_timestamp_prefix() + "_"
234
+ combined = (prompt + " " + response).strip()
235
+ info_terms = get_high_info_terms(combined, top_n=10)
236
+ snippet = (prompt[:100] + " " + response[:100]).strip()
237
+ snippet_cleaned = clean_text_for_filename(snippet)
238
+ name_parts = info_terms + [snippet_cleaned]
239
+ full_name = '_'.join(name_parts)
240
+ if len(full_name) > 150:
241
+ full_name = full_name[:150]
242
+ return f"{prefix}{full_name}.{file_type}"
243
+
244
+ def create_file_old(prompt, response, file_type="md"):
245
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
246
+ with open(filename, 'w', encoding='utf-8') as f:
247
+ f.write(prompt + "\n\n" + response)
248
+ return filename
249
+
250
+ def get_download_link(file, file_type="zip"):
251
+ with open(file, "rb") as f:
252
+ b64 = base64.b64encode(f.read()).decode()
253
+ if file_type == "zip":
254
+ return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">📂 Download {os.path.basename(file)}</a>'
255
+ elif file_type == "mp3":
256
+ return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">🎵 Download {os.path.basename(file)}</a>'
257
+ elif file_type == "wav":
258
+ return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">🔊 Download {os.path.basename(file)}</a>'
259
+ elif file_type == "md":
260
+ return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">📝 Download {os.path.basename(file)}</a>'
261
+ else:
262
+ return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
263
+
264
+ def clean_for_speech(text: str) -> str:
265
+ text = text.replace("\n", " ")
266
+ text = text.replace("</s>", " ")
267
+ text = text.replace("#", "")
268
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
269
+ text = re.sub(r"\s+", " ", text).strip()
270
+ return text
271
+
272
+ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
273
+ text = clean_for_speech(text)
274
+ if not text.strip():
275
+ return None
276
+ rate_str = f"{rate:+d}%"
277
+ pitch_str = f"{pitch:+d}Hz"
278
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
279
+ out_fn = generate_filename(text, text, file_type=file_format)
280
+ await communicate.save(out_fn)
281
+ return out_fn
282
+
283
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
284
+ return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
285
+
286
+ def play_and_download_audio(file_path, file_type="mp3"):
287
+ if file_path and os.path.exists(file_path):
288
+ st.audio(file_path)
289
+ dl_link = get_download_link(file_path, file_type=file_type)
290
+ st.markdown(dl_link, unsafe_allow_html=True)
291
+
292
+ def save_qa_with_audio(question, answer, voice=None):
293
+ """Save Q&A to markdown and generate audio"""
294
+ if not voice:
295
+ voice = st.session_state['tts_voice']
296
+
297
+ # Create markdown file
298
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
299
+ md_file = create_file(question, answer, "md")
300
+
301
+ # Generate audio file
302
+ audio_text = f"{question}\n\nAnswer: {answer}"
303
+ audio_file = speak_with_edge_tts(
304
+ audio_text,
305
+ voice=voice,
306
+ file_format=st.session_state['audio_format']
307
+ )
308
+
309
+ return md_file, audio_file
310
+
311
+ def process_paper_content(paper):
312
+ marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:100]} | 📝 {paper['summary'][:500]}"
313
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
314
+ return marquee_text, audio_text
315
+
316
+ def create_paper_audio_files(papers, input_question):
317
+ for paper in papers:
318
+ try:
319
+ marquee_text, audio_text = process_paper_content(paper)
320
+
321
+ audio_text = clean_for_speech(audio_text)
322
+ file_format = st.session_state['audio_format']
323
+ audio_file = speak_with_edge_tts(audio_text,
324
+ voice=st.session_state['tts_voice'],
325
+ file_format=file_format)
326
+ paper['full_audio'] = audio_file
327
+
328
+ st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(audio_file)}")
329
+ play_and_download_audio(audio_file, file_type=file_format)
330
+ paper['marquee_text'] = marquee_text
331
+
332
+ except Exception as e:
333
+ st.warning(f"Error processing paper {paper['title']}: {str(e)}")
334
+ paper['full_audio'] = None
335
+ paper['marquee_text'] = None
336
+
337
+ def display_papers(papers, marquee_settings):
338
+ st.write("## Research Papers")
339
+
340
+ papercount = 0
341
+ for paper in papers:
342
+ papercount += 1
343
+ if papercount <= 20:
344
+ if paper.get('marquee_text'):
345
+ display_marquee(paper['marquee_text'],
346
+ marquee_settings,
347
+ key_suffix=f"paper_{papercount}")
348
+
349
+ with st.expander(f"{papercount}. 📄 {paper['title']}", expanded=True):
350
+ st.markdown(f"**{paper['date']} | {paper['title']} | ⬇️**")
351
+ st.markdown(f"*{paper['authors']}*")
352
+ st.markdown(paper['summary'])
353
+
354
+ if paper.get('full_audio'):
355
+ st.write("📚 Paper Audio")
356
+ file_ext = os.path.splitext(paper['full_audio'])[1].lower().strip('.')
357
+ if file_ext in ['mp3', 'wav']:
358
+ st.audio(paper['full_audio'])
359
+
360
+ def parse_arxiv_refs(ref_text: str):
361
+ if not ref_text:
362
+ return []
363
+
364
+ results = []
365
+ current_paper = {}
366
+ lines = ref_text.split('\n')
367
+
368
+ for i, line in enumerate(lines):
369
+ if line.count('|') == 2:
370
+ if current_paper:
371
+ results.append(current_paper)
372
+ if len(results) >= 20:
373
+ break
374
+
375
+ try:
376
+ header_parts = line.strip('* ').split('|')
377
+ date = header_parts[0].strip()
378
+ title = header_parts[1].strip()
379
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
380
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
381
+
382
+ current_paper = {
383
+ 'date': date,
384
+ 'title': title,
385
+ 'url': url,
386
+ 'authors': '',
387
+ 'summary': '',
388
+ 'content_start': i + 1
389
+ }
390
+ except Exception as e:
391
+ st.warning(f"Error parsing paper header: {str(e)}")
392
+ current_paper = {}
393
+ continue
394
+
395
+ elif current_paper:
396
+ if not current_paper['authors']:
397
+ current_paper['authors'] = line.strip('* ')
398
+ else:
399
+ if current_paper['summary']:
400
+ current_paper['summary'] += ' ' + line.strip()
401
+ else:
402
+ current_paper['summary'] = line.strip()
403
+
404
+ if current_paper:
405
+ results.append(current_paper)
406
+
407
+ return results[:20]
408
+
409
+
410
+ # ---------------------------- Edit 1/11/2025 - add a constitution to my arxiv system templating to build configurable character and personality of IO.
411
+
412
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
413
+ titles_summary=True, full_audio=False):
414
+ start = time.time()
415
+
416
+ ai_constitution = """
417
+ You are a talented AI coder and songwriter with a unique ability to explain scientific concepts through music with code easter eggs.. Your task is to create a song that not only entertains but also educates listeners about a specific science problem and its potential solutions.
418
+
419
+ 1. First, carefully read and analyze the problem provided:
420
+ <science_problem>
421
+ {{q}}
422
+ </science_problem>
423
+
424
+ 2. Next, consider the style requested:
425
+ <song_style>
426
+ {{SONG_STYLE}}
427
+ </song_style>
428
+
429
+ 3. Follow these steps to create your output:
430
+
431
+ 1. Analyze the problem:
432
+ - Identify the key issues and challenges
433
+ - Note any potential solutions or technologies mentioned, especially in AI
434
+ - Consider how these concepts can be simplified for a general audience
435
+
436
+ 2. Plan your structure. Document and enumerate in markdown outlines with emojis.:
437
+ - Decide on a format that fits the style
438
+ - Plan to introduce the problem
439
+ - Highlight key points or solutions
440
+
441
+ 3. Write.:
442
+ - Begin with an attention-grabbing opening line
443
+ - Use metaphors and analogies to explain complex concepts
444
+ - Ensure the flow naturally fits the rhythm of the chosen style
445
+ - Include scientific terminology, but explain it in simple terms within
446
+
447
+ 4. Incorporate scientific explanations.:
448
+ - Weave factual information throughout the verses
449
+ - Use the chorus to reinforce main ideas or solutions
450
+ - Ensure that the scientific content is accurate and up-to-date
451
+
452
+ 5. Match the requested style.:
453
+ - Adapt your word choice and phrasing to fit the genre
454
+ - Consider the typical rhythm and structure of this style
455
+ - If applicable, include style-specific elements
456
+
457
+ 6. Review and refine, add useful paper titles, keywords, descriptions of topics and concepts.:
458
+ - Check that effectively communicates the problem and solutions
459
+ - Ensure catchy and memorable
460
+ - Verify maintains the requested style throughout
461
+ """
462
+
463
+
464
+
465
+
466
+
467
+ # Claude then Arxiv..
468
+
469
+ # Claude:
470
+ client = anthropic.Anthropic(api_key=anthropic_key)
471
+ user_input = q
472
+
473
+ response = client.messages.create(
474
+ model="claude-3-sonnet-20240229",
475
+ max_tokens=1000,
476
+ messages=[
477
+ {"role": "user", "content": user_input}
478
+ ])
479
+
480
+ st.write("Claude's reply 🧠:")
481
+ st.markdown(response.content[0].text)
482
+
483
+ # Render audio track for Claude Response
484
+ #filename = generate_filename(q, response.content[0].text)
485
+ result = response.content[0].text
486
+ create_file(q, result)
487
+ # Save and produce audio for Claude response
488
+ md_file, audio_file = save_qa_with_audio(q, result)
489
+ st.subheader("📝 Main Response Audio")
490
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
491
+
492
+
493
+
494
+
495
+
496
+ # Arxiv:
497
+ st.write("Arxiv's AI this Evening is Mixtral 8x7B MoE Instruct with 9 English Voices 🧠:")
498
+
499
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
500
+ refs = client.predict(q, 20, "Semantic Search",
501
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
502
+ api_name="/update_with_rag_md")[0]
503
+
504
+ r2 = client.predict(q, "mistralai/Mixtral-8x7B-Instruct-v0.1",
505
+ True, api_name="/ask_llm")
506
+
507
+ result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
508
+
509
+ # Save and produce audio
510
+ md_file, audio_file = save_qa_with_audio(q, result)
511
+
512
+ st.subheader("📝 Main Response Audio")
513
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
514
+
515
+ papers = parse_arxiv_refs(refs)
516
+ if papers:
517
+ create_paper_audio_files(papers, input_question=q)
518
+ display_papers(papers, get_marquee_settings())
519
+ else:
520
+ st.warning("No papers found in the response.")
521
+
522
+ elapsed = time.time()-start
523
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
524
+
525
+ return result
526
+
527
+ def process_voice_input(text):
528
+ if not text:
529
+ return
530
+
531
+ st.subheader("🔍 Search Results")
532
+ result = perform_ai_lookup(
533
+ text,
534
+ vocal_summary=True,
535
+ extended_refs=False,
536
+ titles_summary=True,
537
+ full_audio=True
538
+ )
539
+
540
+ md_file, audio_file = save_qa_with_audio(text, result)
541
+
542
+ st.subheader("📝 Generated Files")
543
+ st.write(f"Markdown: {md_file}")
544
+ st.write(f"Audio: {audio_file}")
545
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
546
+
547
+ def load_files_for_sidebar():
548
+ md_files = glob.glob("*.md")
549
+ mp3_files = glob.glob("*.mp3")
550
+ wav_files = glob.glob("*.wav")
551
+
552
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
553
+ all_files = md_files + mp3_files + wav_files
554
+
555
+ groups = defaultdict(list)
556
+ prefix_length = len("MM_dd_yy_hh_mm_AP")
557
+
558
+ for f in all_files:
559
+ basename = os.path.basename(f)
560
+ if len(basename) >= prefix_length and '_' in basename:
561
+ group_name = basename[:prefix_length]
562
+ groups[group_name].append(f)
563
+ else:
564
+ groups['Other'].append(f)
565
+
566
+ sorted_groups = sorted(groups.items(),
567
+ key=lambda x: x[0] if x[0] != 'Other' else '',
568
+ reverse=True)
569
+ return sorted_groups
570
+
571
+ def display_file_manager_sidebar(groups_sorted):
572
+ st.sidebar.title("🎵 Audio & Docs Manager")
573
+
574
+ all_md = []
575
+ all_mp3 = []
576
+ all_wav = []
577
+ for _, files in groups_sorted:
578
+ for f in files:
579
+ if f.endswith(".md"):
580
+ all_md.append(f)
581
+ elif f.endswith(".mp3"):
582
+ all_mp3.append(f)
583
+ elif f.endswith(".wav"):
584
+ all_wav.append(f)
585
+
586
+ col1, col4 = st.sidebar.columns(2)
587
+ with col1:
588
+ if st.button("🗑 Delete All"):
589
+ for f in all_md:
590
+ os.remove(f)
591
+ for f in all_mp3:
592
+ os.remove(f)
593
+ for f in all_wav:
594
+ os.remove(f)
595
+ st.session_state.should_rerun = True
596
+ with col4:
597
+ if st.button("⬇️ Zip All"):
598
+ zip_name = create_zip_of_files(all_md, all_mp3, all_wav, st.session_state.get('last_query', ''))
599
+ if zip_name:
600
+ st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
601
+
602
+ for group_name, files in groups_sorted:
603
+ if group_name == 'Other':
604
+ group_label = 'Other Files'
605
+ else:
606
+ try:
607
+ timestamp_dt = datetime.strptime(group_name, "%m_%d_%y_%I_%M_%p")
608
+ group_label = timestamp_dt.strftime("%b %d, %Y %I:%M %p")
609
+ except ValueError:
610
+ group_label = group_name
611
+
612
+ with st.sidebar.expander(f"📁 {group_label} ({len(files)})", expanded=True):
613
+ c1, c2 = st.columns(2)
614
+ with c1:
615
+ if st.button("👀 View", key=f"view_group_{group_name}"):
616
+ st.session_state.viewing_prefix = group_name
617
+ with c2:
618
+ if st.button("🗑 Del", key=f"del_group_{group_name}"):
619
+ for f in files:
620
+ os.remove(f)
621
+ st.success(f"Deleted group {group_label}!")
622
+ st.session_state.should_rerun = True
623
+
624
+ for f in files:
625
+ fname = os.path.basename(f)
626
+ ext = os.path.splitext(fname)[1].lower()
627
+ emoji = FILE_EMOJIS.get(ext.strip('.'), '')
628
+ mtime = os.path.getmtime(f)
629
+ ctime = datetime.fromtimestamp(mtime).strftime("%I:%M:%S %p")
630
+ st.write(f"{emoji} **{fname}** - {ctime}")
631
+
632
+ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
633
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
634
+ all_files = md_files + mp3_files + wav_files
635
+ if not all_files:
636
+ return None
637
+
638
+ all_content = []
639
+ for f in all_files:
640
+ if f.endswith('.md'):
641
+ with open(f, 'r', encoding='utf-8') as file:
642
+ all_content.append(file.read())
643
+ elif f.endswith('.mp3') or f.endswith('.wav'):
644
+ basename = os.path.splitext(os.path.basename(f))[0]
645
+ words = basename.replace('_', ' ')
646
+ all_content.append(words)
647
+
648
+ all_content.append(input_question)
649
+ combined_content = " ".join(all_content)
650
+ info_terms = get_high_info_terms(combined_content, top_n=10)
651
+
652
+ timestamp = format_timestamp_prefix()
653
+ name_text = '_'.join(term.replace(' ', '-') for term in info_terms[:10])
654
+ zip_name = f"{timestamp}_{name_text}.zip"
655
+
656
+ with zipfile.ZipFile(zip_name, 'w') as z:
657
+ for f in all_files:
658
+ z.write(f)
659
+
660
+ return zip_name
661
+
662
+ def main():
663
+ # Update marquee settings UI first
664
+ update_marquee_settings_ui()
665
+ marquee_settings = get_marquee_settings()
666
+
667
+ # Initial welcome marquee
668
+ display_marquee(st.session_state['marquee_content'],
669
+ {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
670
+ key_suffix="welcome")
671
+
672
+ # Load files for sidebar
673
+ groups_sorted = load_files_for_sidebar()
674
+
675
+ # Update marquee content when viewing files
676
+ if st.session_state.viewing_prefix:
677
+ for group_name, files in groups_sorted:
678
+ if group_name == st.session_state.viewing_prefix:
679
+ for f in files:
680
+ if f.endswith('.md'):
681
+ with open(f, 'r', encoding='utf-8') as file:
682
+ st.session_state['marquee_content'] = file.read()[:280]
683
+
684
+ # Instead of putting voice settings in the sidebar,
685
+ # we will handle them in the "🎤 Voice" tab below.
686
+
687
+ # Main Interface
688
+ tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
689
+ horizontal=True)
690
+
691
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
692
+ val = mycomponent(my_input_value="Hello")
693
+
694
+ if val:
695
+ val_stripped = val.replace('\\n', ' ')
696
+ edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
697
+
698
+ run_option = st.selectbox("Model:", ["Arxiv"])
699
+ col1, col2 = st.columns(2)
700
+ with col1:
701
+ autorun = st.checkbox("⚙ AutoRun", value=True)
702
+ with col2:
703
+ full_audio = st.checkbox("📚FullAudio", value=False)
704
+
705
+ input_changed = (val != st.session_state.old_val)
706
+
707
+ if autorun and input_changed:
708
+ st.session_state.old_val = val
709
+ st.session_state.last_query = edited_input
710
+ result = perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
711
+ titles_summary=True, full_audio=full_audio)
712
+ else:
713
+ if st.button("▶ Run"):
714
+ st.session_state.old_val = val
715
+ st.session_state.last_query = edited_input
716
+ result = perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
717
+ titles_summary=True, full_audio=full_audio)
718
+
719
+ # --- Tab: ArXiv
720
+ if tab_main == "🔍 ArXiv":
721
+ st.subheader("🔍 Query ArXiv")
722
+ q = st.text_input("🔍 Query:", key="arxiv_query")
723
+
724
+ st.markdown("### 🎛 Options")
725
+ vocal_summary = st.checkbox("🎙ShortAudio", value=True, key="option_vocal_summary")
726
+ extended_refs = st.checkbox("📜LongRefs", value=False, key="option_extended_refs")
727
+ titles_summary = st.checkbox("🔖TitlesOnly", value=True, key="option_titles_summary")
728
+ full_audio = st.checkbox("📚FullAudio", value=False, key="option_full_audio")
729
+ full_transcript = st.checkbox("🧾FullTranscript", value=False, key="option_full_transcript")
730
+
731
+ if q and st.button("🔍Run"):
732
+ st.session_state.last_query = q
733
+ result = perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
734
+ titles_summary=titles_summary, full_audio=full_audio)
735
+ if full_transcript:
736
+ create_file(q, result, "md")
737
+
738
+ # --- Tab: Voice
739
+ elif tab_main == "🎤 Voice":
740
+ st.subheader("🎤 Voice Input")
741
+
742
+ # Move voice selection here:
743
+ st.markdown("### 🎤 Voice Settings")
744
+ selected_voice = st.selectbox(
745
+ "Select TTS Voice:",
746
+ options=EDGE_TTS_VOICES,
747
+ index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
748
+ )
749
+
750
+ # Audio Format Settings below the voice selection
751
+ st.markdown("### 🔊 Audio Format")
752
+ selected_format = st.radio(
753
+ "Choose Audio Format:",
754
+ options=["MP3", "WAV"],
755
+ index=0
756
+ )
757
+
758
+ if selected_voice != st.session_state['tts_voice']:
759
+ st.session_state['tts_voice'] = selected_voice
760
+ st.rerun()
761
+ if selected_format.lower() != st.session_state['audio_format']:
762
+ st.session_state['audio_format'] = selected_format.lower()
763
+ st.rerun()
764
+
765
+ # Now the text area to enter your message
766
+ user_text = st.text_area("💬 Message:", height=100)
767
+ user_text = user_text.strip().replace('\n', ' ')
768
+
769
+ if st.button("📨 Send"):
770
+ process_voice_input(user_text)
771
+
772
+ st.subheader("📜 Chat History")
773
+ for c in st.session_state.chat_history:
774
+ st.write("**You:**", c["user"])
775
+ st.write("**Response:**", c["claude"])
776
+
777
+ # --- Tab: Media
778
+ elif tab_main == "📸 Media":
779
+ st.header("📸 Images & 🎥 Videos")
780
+ tabs = st.tabs(["🖼 Images", "🎥 Video"])
781
+ with tabs[0]:
782
+ imgs = glob.glob("*.png") + glob.glob("*.jpg")
783
+ if imgs:
784
+ c = st.slider("Cols", 1, 5, 3)
785
+ cols = st.columns(c)
786
+ for i, f in enumerate(imgs):
787
+ with cols[i % c]:
788
+ st.image(Image.open(f), use_container_width=True)
789
+ if st.button(f"👀 Analyze {os.path.basename(f)}", key=f"analyze_{f}"):
790
+ response = openai_client.chat.completions.create(
791
+ model=st.session_state["openai_model"],
792
+ messages=[
793
+ {"role": "system", "content": "Analyze the image content."},
794
+ {"role": "user", "content": [
795
+ {"type": "image_url",
796
+ "image_url": {"url": f"data:image/jpeg;base64,{base64.b64encode(open(f, 'rb').read()).decode()}"}}
797
+ ]}
798
+ ]
799
+ )
800
+ st.markdown(response.choices[0].message.content)
801
+ else:
802
+ st.write("No images found.")
803
+
804
+ with tabs[1]:
805
+ vids = glob.glob("*.mp4")
806
+ if vids:
807
+ for v in vids:
808
+ with st.expander(f"🎥 {os.path.basename(v)}"):
809
+ st.video(v)
810
+ if st.button(f"Analyze {os.path.basename(v)}", key=f"analyze_{v}"):
811
+ frames = process_video(v)
812
+ response = openai_client.chat.completions.create(
813
+ model=st.session_state["openai_model"],
814
+ messages=[
815
+ {"role": "system", "content": "Analyze video frames."},
816
+ {"role": "user", "content": [
817
+ {"type": "image_url",
818
+ "image_url": {"url": f"data:image/jpeg;base64,{frame}"}}
819
+ for frame in frames
820
+ ]}
821
+ ]
822
+ )
823
+ st.markdown(response.choices[0].message.content)
824
+ else:
825
+ st.write("No videos found.")
826
+
827
+ # --- Tab: Editor
828
+ elif tab_main == "📝 Editor":
829
+ if st.session_state.editing_file:
830
+ st.subheader(f"Editing: {st.session_state.editing_file}")
831
+ new_text = st.text_area("✏️ Content:", st.session_state.edit_new_content, height=300)
832
+ if st.button("💾 Save"):
833
+ with open(st.session_state.editing_file, 'w', encoding='utf-8') as f:
834
+ f.write(new_text)
835
+ st.success("File updated successfully!")
836
+ st.session_state.should_rerun = True
837
+ st.session_state.editing_file = None
838
+ else:
839
+ st.write("Select a file from the sidebar to edit.")
840
+
841
+ # Display file manager in sidebar
842
+ display_file_manager_sidebar(groups_sorted)
843
+
844
+ # Display viewed group content
845
+ if st.session_state.viewing_prefix and any(st.session_state.viewing_prefix == group for group, _ in groups_sorted):
846
+ st.write("---")
847
+ st.write(f"**Viewing Group:** {st.session_state.viewing_prefix}")
848
+ for group_name, files in groups_sorted:
849
+ if group_name == st.session_state.viewing_prefix:
850
+ for f in files:
851
+ fname = os.path.basename(f)
852
+ ext = os.path.splitext(fname)[1].lower().strip('.')
853
+ st.write(f"### {fname}")
854
+ if ext == "md":
855
+ content = open(f, 'r', encoding='utf-8').read()
856
+ st.markdown(content)
857
+ elif ext in ["mp3", "wav"]:
858
+ st.audio(f)
859
+ else:
860
+ st.markdown(get_download_link(f), unsafe_allow_html=True)
861
+ break
862
+ if st.button("❌ Close"):
863
+ st.session_state.viewing_prefix = None
864
+ st.session_state['marquee_content'] = "🚀 Welcome to Deep Research Evaluator | 🤖 Your Talking Research Assistant"
865
+
866
+ st.markdown("""
867
+ <style>
868
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
869
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
870
+ .stButton>button { margin-right: 0.5rem; }
871
+ </style>
872
+ """, unsafe_allow_html=True)
873
+
874
+ if st.session_state.should_rerun:
875
+ st.session_state.should_rerun = False
876
+ st.rerun()
877
+
878
+ if __name__ == "__main__":
879
+ main()