awacke1 commited on
Commit
0b93794
·
verified ·
1 Parent(s): 5ddf8c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +461 -138
app.py CHANGED
@@ -1,4 +1,444 @@
1
- return sorted_groups
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def display_file_manager_sidebar(groups_sorted):
4
  st.sidebar.title("🎵 Audio & Docs Manager")
@@ -97,142 +537,20 @@ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
97
 
98
  return zip_name
99
 
100
- def get_marquee_settings():
101
- st.sidebar.markdown("### 🎯 Marquee Settings")
102
- cols = st.sidebar.columns(2)
103
- with cols[0]:
104
- bg_color = st.color_picker("🎨 Background", "#1E1E1E", key="bg_color_picker")
105
- text_color = st.color_picker("✍️ Text", "#FFFFFF", key="text_color_picker")
106
- with cols[1]:
107
- font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider")
108
- duration = st.slider("⏱️ Speed", 1, 20, 10, key="duration_slider")
109
-
110
- return {
111
- "background": bg_color,
112
- "color": text_color,
113
- "font-size": f"{font_size}px",
114
- "animationDuration": f"{duration}s",
115
- "width": "100%",
116
- "lineHeight": "35px"
117
- }
118
-
119
- def display_marquee(text, settings, key_suffix=""):
120
- truncated_text = text[:280] + "..." if len(text) > 280 else text
121
- streamlit_marquee(
122
- content=truncated_text,
123
- **settings,
124
- key=f"marquee_{key_suffix}"
125
- )
126
- st.write("")
127
-
128
- def parse_arxiv_refs(ref_text: str):
129
- if not ref_text:
130
- return []
131
-
132
- results = []
133
- current_paper = {}
134
- lines = ref_text.split('\n')
135
-
136
- for i, line in enumerate(lines):
137
- if line.count('|') == 2:
138
- if current_paper:
139
- results.append(current_paper)
140
- if len(results) >= 20:
141
- break
142
-
143
- try:
144
- header_parts = line.strip('* ').split('|')
145
- date = header_parts[0].strip()
146
- title = header_parts[1].strip()
147
- url_match = re.search(r'(https://arxiv.org/\S+)', line)
148
- url = url_match.group(1) if url_match else f"paper_{len(results)}"
149
-
150
- current_paper = {
151
- 'date': date,
152
- 'title': title,
153
- 'url': url,
154
- 'authors': '',
155
- 'summary': '',
156
- 'content_start': i + 1
157
- }
158
- except Exception as e:
159
- st.warning(f"Error parsing paper header: {str(e)}")
160
- current_paper = {}
161
- continue
162
-
163
- elif current_paper:
164
- if not current_paper['authors']:
165
- current_paper['authors'] = line.strip('* ')
166
- else:
167
- if current_paper['summary']:
168
- current_paper['summary'] += ' ' + line.strip()
169
- else:
170
- current_paper['summary'] = line.strip()
171
-
172
- if current_paper:
173
- results.append(current_paper)
174
-
175
- return results[:20]
176
-
177
- def process_paper_content(paper):
178
- marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:100]} | 📝 {paper['summary'][:100]}"
179
- audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
180
- return marquee_text, audio_text
181
-
182
- def create_paper_audio_files(papers, input_question):
183
- for paper in papers:
184
- try:
185
- marquee_text, audio_text = process_paper_content(paper)
186
-
187
- audio_text = clean_for_speech(audio_text)
188
- file_format = st.session_state['audio_format']
189
- audio_file = speak_with_edge_tts(audio_text,
190
- voice=st.session_state['tts_voice'],
191
- file_format=file_format)
192
- paper['full_audio'] = audio_file
193
-
194
- st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(audio_file)}")
195
- play_and_download_audio(audio_file, file_type=file_format)
196
- paper['marquee_text'] = marquee_text
197
-
198
- except Exception as e:
199
- st.warning(f"Error processing paper {paper['title']}: {str(e)}")
200
- paper['full_audio'] = None
201
- paper['marquee_text'] = None
202
-
203
- def display_papers(papers, marquee_settings):
204
- st.write("## Research Papers")
205
-
206
- papercount = 0
207
- for paper in papers:
208
- papercount += 1
209
- if papercount <= 20:
210
- if paper.get('marquee_text'):
211
- display_marquee(paper['marquee_text'],
212
- marquee_settings,
213
- key_suffix=f"paper_{papercount}")
214
-
215
- with st.expander(f"{papercount}. 📄 {paper['title']}", expanded=True):
216
- st.markdown(f"**{paper['date']} | {paper['title']} | ⬇️**")
217
- st.markdown(f"*{paper['authors']}*")
218
- st.markdown(paper['summary'])
219
-
220
- if paper.get('full_audio'):
221
- st.write("📚 Paper Audio")
222
- file_ext = os.path.splitext(paper['full_audio'])[1].lower().strip('.')
223
- if file_ext in ['mp3', 'wav']:
224
- st.audio(paper['full_audio'])
225
-
226
-
227
  def main():
 
 
228
  marquee_settings = get_marquee_settings()
229
 
 
230
  display_marquee(st.session_state['marquee_content'],
231
  {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
232
  key_suffix="welcome")
233
 
 
234
  groups_sorted = load_files_for_sidebar()
235
 
 
236
  if st.session_state.viewing_prefix:
237
  for group_name, files in groups_sorted:
238
  if group_name == st.session_state.viewing_prefix:
@@ -241,6 +559,7 @@ def main():
241
  with open(f, 'r', encoding='utf-8') as file:
242
  st.session_state['marquee_content'] = file.read()[:280]
243
 
 
244
  st.sidebar.markdown("### 🎤 Voice Settings")
245
  selected_voice = st.sidebar.selectbox(
246
  "Select TTS Voice:",
@@ -248,6 +567,7 @@ def main():
248
  index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
249
  )
250
 
 
251
  st.sidebar.markdown("### 🔊 Audio Format")
252
  selected_format = st.sidebar.radio(
253
  "Choose Audio Format:",
@@ -262,6 +582,7 @@ def main():
262
  st.session_state['audio_format'] = selected_format.lower()
263
  st.rerun()
264
 
 
265
  tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
266
  horizontal=True)
267
 
@@ -272,7 +593,7 @@ def main():
272
  val_stripped = val.replace('\\n', ' ')
273
  edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
274
 
275
- run_option = st.selectbox("Model:", ["Arxiv", "GPT-4o", "Claude-3.5"])
276
  col1, col2 = st.columns(2)
277
  with col1:
278
  autorun = st.checkbox("⚙ AutoRun", value=True)
@@ -285,15 +606,13 @@ def main():
285
  st.session_state.old_val = val
286
  st.session_state.last_query = edited_input
287
  result = perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
288
- titles_summary=True, full_audio=full_audio,
289
- marquee_settings=marquee_settings)
290
  else:
291
  if st.button("▶ Run"):
292
  st.session_state.old_val = val
293
  st.session_state.last_query = edited_input
294
  result = perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
295
- titles_summary=True, full_audio=full_audio,
296
- marquee_settings=marquee_settings)
297
 
298
  if tab_main == "🔍 ArXiv":
299
  st.subheader("🔍 Query ArXiv")
@@ -309,8 +628,9 @@ def main():
309
  if q and st.button("🔍Run"):
310
  st.session_state.last_query = q
311
  result = perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
312
- titles_summary=titles_summary, full_audio=full_audio,
313
- marquee_settings=marquee_settings)
 
314
 
315
  elif tab_main == "🎤 Voice":
316
  st.subheader("🎤 Voice Input")
@@ -318,10 +638,11 @@ def main():
318
  user_text = user_text.strip().replace('\n', ' ')
319
 
320
  if st.button("📨 Send"):
321
- process_voice_input(user_text, marquee_settings=marquee_settings)
322
 
323
- st.subheader("📜 Chat History")for c in st.session_state.chat_history:
324
- st.write("**You:**", c["user"])
 
325
  st.write("**Response:**", c["claude"])
326
 
327
  elif tab_main == "📸 Media":
@@ -386,8 +707,10 @@ def main():
386
  else:
387
  st.write("Select a file from the sidebar to edit.")
388
 
 
389
  display_file_manager_sidebar(groups_sorted)
390
 
 
391
  if st.session_state.viewing_prefix and any(st.session_state.viewing_prefix == group for group, _ in groups_sorted):
392
  st.write("---")
393
  st.write(f"**Viewing Group:** {st.session_state.viewing_prefix}")
 
1
+ import streamlit as st
2
+ import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
3
+ import plotly.graph_objects as go
4
+ import streamlit.components.v1 as components
5
+ from datetime import datetime
6
+ from audio_recorder_streamlit import audio_recorder
7
+ from bs4 import BeautifulSoup
8
+ from collections import defaultdict, deque, Counter
9
+ from dotenv import load_dotenv
10
+ from gradio_client import Client
11
+ from huggingface_hub import InferenceClient
12
+ from io import BytesIO
13
+ from PIL import Image
14
+ from PyPDF2 import PdfReader
15
+ from urllib.parse import quote
16
+ from xml.etree import ElementTree as ET
17
+ from openai import OpenAI
18
+ import extra_streamlit_components as stx
19
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
+ import asyncio
21
+ import edge_tts
22
+ from streamlit_marquee import streamlit_marquee
23
+
24
+ # 🎯 1. Core Configuration & Setup
25
+ st.set_page_config(
26
+ page_title="🚲TalkingAIResearcher🏆",
27
+ page_icon="🚲🏆",
28
+ layout="wide",
29
+ initial_sidebar_state="auto",
30
+ menu_items={
31
+ 'Get Help': 'https://huggingface.co/awacke1',
32
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
33
+ 'About': "🚲TalkingAIResearcher🏆"
34
+ }
35
+ )
36
+ load_dotenv()
37
+
38
+ # Add available English voices for Edge TTS
39
+ EDGE_TTS_VOICES = [
40
+ "en-US-AriaNeural",
41
+ "en-US-GuyNeural",
42
+ "en-US-JennyNeural",
43
+ "en-GB-SoniaNeural",
44
+ "en-GB-RyanNeural",
45
+ "en-AU-NatashaNeural",
46
+ "en-AU-WilliamNeural",
47
+ "en-CA-ClaraNeural",
48
+ "en-CA-LiamNeural"
49
+ ]
50
+
51
+ # Initialize session state variables
52
+ if 'marquee_settings' not in st.session_state:
53
+ st.session_state['marquee_settings'] = {
54
+ "background": "#1E1E1E",
55
+ "color": "#FFFFFF",
56
+ "font-size": "14px",
57
+ "animationDuration": "10s",
58
+ "width": "100%",
59
+ "lineHeight": "35px"
60
+ }
61
+
62
+ if 'tts_voice' not in st.session_state:
63
+ st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
64
+ if 'audio_format' not in st.session_state:
65
+ st.session_state['audio_format'] = 'mp3'
66
+ if 'transcript_history' not in st.session_state:
67
+ st.session_state['transcript_history'] = []
68
+ if 'chat_history' not in st.session_state:
69
+ st.session_state['chat_history'] = []
70
+ if 'openai_model' not in st.session_state:
71
+ st.session_state['openai_model'] = "gpt-4o-2024-05-13"
72
+ if 'messages' not in st.session_state:
73
+ st.session_state['messages'] = []
74
+ if 'last_voice_input' not in st.session_state:
75
+ st.session_state['last_voice_input'] = ""
76
+ if 'editing_file' not in st.session_state:
77
+ st.session_state['editing_file'] = None
78
+ if 'edit_new_name' not in st.session_state:
79
+ st.session_state['edit_new_name'] = ""
80
+ if 'edit_new_content' not in st.session_state:
81
+ st.session_state['edit_new_content'] = ""
82
+ if 'viewing_prefix' not in st.session_state:
83
+ st.session_state['viewing_prefix'] = None
84
+ if 'should_rerun' not in st.session_state:
85
+ st.session_state['should_rerun'] = False
86
+ if 'old_val' not in st.session_state:
87
+ st.session_state['old_val'] = None
88
+ if 'last_query' not in st.session_state:
89
+ st.session_state['last_query'] = ""
90
+ if 'marquee_content' not in st.session_state:
91
+ st.session_state['marquee_content'] = "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant"
92
+
93
+ # 🔑 2. API Setup & Clients
94
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
95
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
96
+ xai_key = os.getenv('xai',"")
97
+ if 'OPENAI_API_KEY' in st.secrets:
98
+ openai_api_key = st.secrets['OPENAI_API_KEY']
99
+ if 'ANTHROPIC_API_KEY' in st.secrets:
100
+ anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
101
+
102
+ openai.api_key = openai_api_key
103
+ claude_client = anthropic.Anthropic(api_key=anthropic_key)
104
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
105
+ HF_KEY = os.getenv('HF_KEY')
106
+ API_URL = os.getenv('API_URL')
107
+
108
+ # Constants
109
+ FILE_EMOJIS = {
110
+ "md": "📝",
111
+ "mp3": "🎵",
112
+ "wav": "🔊"
113
+ }
114
+
115
+ def get_central_time():
116
+ """Get current time in US Central timezone"""
117
+ central = pytz.timezone('US/Central')
118
+ return datetime.now(central)
119
+
120
+ def format_timestamp_prefix():
121
+ """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM"""
122
+ ct = get_central_time()
123
+ return ct.strftime("%m_%d_%y_%I_%M_%p")
124
+
125
+ def initialize_marquee_settings():
126
+ """Initialize marquee settings in session state"""
127
+ if 'marquee_settings' not in st.session_state:
128
+ st.session_state['marquee_settings'] = {
129
+ "background": "#1E1E1E",
130
+ "color": "#FFFFFF",
131
+ "font-size": "14px",
132
+ "animationDuration": "10s",
133
+ "width": "100%",
134
+ "lineHeight": "35px"
135
+ }
136
+
137
+ def get_marquee_settings():
138
+ """Get or update marquee settings from session state"""
139
+ initialize_marquee_settings()
140
+ return st.session_state['marquee_settings']
141
+
142
+ def update_marquee_settings_ui():
143
+ """Update marquee settings via UI controls"""
144
+ initialize_marquee_settings()
145
+ st.sidebar.markdown("### 🎯 Marquee Settings")
146
+ cols = st.sidebar.columns(2)
147
+ with cols[0]:
148
+ bg_color = st.color_picker("🎨 Background",
149
+ st.session_state['marquee_settings']["background"],
150
+ key="bg_color_picker")
151
+ text_color = st.color_picker("✍️ Text",
152
+ st.session_state['marquee_settings']["color"],
153
+ key="text_color_picker")
154
+ with cols[1]:
155
+ font_size = st.slider("📏 Size", 10, 24, 14, key="font_size_slider")
156
+ duration = st.slider("⏱️ Speed", 1, 20, 10, key="duration_slider")
157
+
158
+ st.session_state['marquee_settings'].update({
159
+ "background": bg_color,
160
+ "color": text_color,
161
+ "font-size": f"{font_size}px",
162
+ "animationDuration": f"{duration}s"
163
+ })
164
+
165
+ def display_marquee(text, settings, key_suffix=""):
166
+ """Display marquee with given text and settings"""
167
+ truncated_text = text[:280] + "..." if len(text) > 280 else text
168
+ streamlit_marquee(
169
+ content=truncated_text,
170
+ **settings,
171
+ key=f"marquee_{key_suffix}"
172
+ )
173
+ st.write("")
174
+
175
+ def get_high_info_terms(text: str, top_n=10) -> list:
176
+ stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
177
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
178
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
179
+ combined = words + bi_grams
180
+ filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2]
181
+ counter = Counter(filtered)
182
+ return [term for term, freq in counter.most_common(top_n)]
183
+
184
+ def clean_text_for_filename(text: str) -> str:
185
+ text = text.lower()
186
+ text = re.sub(r'[^\w\s-]', '', text)
187
+ words = text.split()
188
+ stop_short = set(['the', 'and', 'for', 'with', 'this', 'that'])
189
+ filtered = [w for w in words if len(w) > 3 and w not in stop_short]
190
+ return '_'.join(filtered)[:200]
191
+
192
+ def generate_filename(prompt, response, file_type="md"):
193
+ prefix = format_timestamp_prefix() + "_"
194
+ combined = (prompt + " " + response).strip()
195
+ info_terms = get_high_info_terms(combined, top_n=10)
196
+ snippet = (prompt[:100] + " " + response[:100]).strip()
197
+ snippet_cleaned = clean_text_for_filename(snippet)
198
+ name_parts = info_terms + [snippet_cleaned]
199
+ full_name = '_'.join(name_parts)
200
+ if len(full_name) > 150:
201
+ full_name = full_name[:150]
202
+ return f"{prefix}{full_name}.{file_type}"
203
+
204
+ def create_file(prompt, response, file_type="md"):
205
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
206
+ with open(filename, 'w', encoding='utf-8') as f:
207
+ f.write(prompt + "\n\n" + response)
208
+ return filename
209
+
210
+ def get_download_link(file, file_type="zip"):
211
+ with open(file, "rb") as f:
212
+ b64 = base64.b64encode(f.read()).decode()
213
+ if file_type == "zip":
214
+ return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">📂 Download {os.path.basename(file)}</a>'
215
+ elif file_type == "mp3":
216
+ return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">🎵 Download {os.path.basename(file)}</a>'
217
+ elif file_type == "wav":
218
+ return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">🔊 Download {os.path.basename(file)}</a>'
219
+ elif file_type == "md":
220
+ return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">📝 Download {os.path.basename(file)}</a>'
221
+ else:
222
+ return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
223
+
224
+ def clean_for_speech(text: str) -> str:
225
+ text = text.replace("\n", " ")
226
+ text = text.replace("</s>", " ")
227
+ text = text.replace("#", "")
228
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
229
+ text = re.sub(r"\s+", " ", text).strip()
230
+ return text
231
+
232
+ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
233
+ text = clean_for_speech(text)
234
+ if not text.strip():
235
+ return None
236
+ rate_str = f"{rate:+d}%"
237
+ pitch_str = f"{pitch:+d}Hz"
238
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
239
+ out_fn = generate_filename(text, text, file_type=file_format)
240
+ await communicate.save(out_fn)
241
+ return out_fn
242
+
243
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
244
+ return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
245
+
246
+ def play_and_download_audio(file_path, file_type="mp3"):
247
+ if file_path and os.path.exists(file_path):
248
+ st.audio(file_path)
249
+ dl_link = get_download_link(file_path, file_type=file_type)
250
+ st.markdown(dl_link, unsafe_allow_html=True)
251
+
252
+ def save_qa_with_audio(question, answer, voice=None):
253
+ """Save Q&A to markdown and generate audio"""
254
+ if not voice:
255
+ voice = st.session_state['tts_voice']
256
+
257
+ # Create markdown file
258
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
259
+ md_file = create_file(question, answer, "md")
260
+
261
+ # Generate audio file
262
+ audio_text = f"Question: {question}\n\nAnswer: {answer}"
263
+ audio_file = speak_with_edge_tts(
264
+ audio_text,
265
+ voice=voice,
266
+ file_format=st.session_state['audio_format']
267
+ )
268
+
269
+ return md_file, audio_file
270
+
271
+ def process_paper_content(paper):
272
+ marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:100]} | 📝 {paper['summary'][:100]}"
273
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
274
+ return marquee_text, audio_text
275
+
276
+ def create_paper_audio_files(papers, input_question):
277
+ for paper in papers:
278
+ try:
279
+ marquee_text, audio_text = process_paper_content(paper)
280
+
281
+ audio_text = clean_for_speech(audio_text)
282
+ file_format = st.session_state['audio_format']
283
+ audio_file = speak_with_edge_tts(audio_text,
284
+ voice=st.session_state['tts_voice'],
285
+ file_format=file_format)
286
+ paper['full_audio'] = audio_file
287
+
288
+ st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(audio_file)}")
289
+ play_and_download_audio(audio_file, file_type=file_format)
290
+ paper['marquee_text'] = marquee_text
291
+
292
+ except Exception as e:
293
+ st.warning(f"Error processing paper {paper['title']}: {str(e)}")
294
+ paper['full_audio'] = None
295
+ paper['marquee_text'] = None
296
+
297
+ def display_papers(papers, marquee_settings):
298
+ st.write("## Research Papers")
299
+
300
+ papercount = 0
301
+ for paper in papers:
302
+ papercount += 1
303
+ if papercount <= 20:
304
+ if paper.get('marquee_text'):
305
+ display_marquee(paper['marquee_text'],
306
+ marquee_settings,
307
+ key_suffix=f"paper_{papercount}")
308
+
309
+ with st.expander(f"{papercount}. 📄 {paper['title']}", expanded=True):
310
+ st.markdown(f"**{paper['date']} | {paper['title']} | ⬇️**")
311
+ st.markdown(f"*{paper['authors']}*")
312
+ st.markdown(paper['summary'])
313
+
314
+ if paper.get('full_audio'):
315
+ st.write("📚 Paper Audio")
316
+ file_ext = os.path.splitext(paper['full_audio'])[1].lower().strip('.')
317
+ if file_ext in ['mp3', 'wav']:
318
+ st.audio(paper['full_audio'])
319
+
320
+ def parse_arxiv_refs(ref_text: str):
321
+ if not ref_text:
322
+ return []
323
+
324
+ results = []
325
+ current_paper = {}
326
+ lines = ref_text.split('\n')
327
+
328
+ for i, line in enumerate(lines):
329
+ if line.count('|') == 2:
330
+ if current_paper:
331
+ results.append(current_paper)
332
+ if len(results) >= 20:
333
+ break
334
+
335
+ try:
336
+ header_parts = line.strip('* ').split('|')
337
+ date = header_parts[0].strip()
338
+ title = header_parts[1].strip()
339
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
340
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
341
+
342
+ current_paper = {
343
+ 'date': date,
344
+ 'title': title,
345
+ 'url': url,
346
+ 'authors': '',
347
+ 'summary': '',
348
+ 'content_start': i + 1
349
+ }
350
+ except Exception as e:
351
+ st.warning(f"Error parsing paper header: {str(e)}")
352
+ current_paper = {}
353
+ continue
354
+
355
+ elif current_paper:
356
+ if not current_paper['authors']:
357
+ current_paper['authors'] = line.strip('* ')
358
+ else:
359
+ if current_paper['summary']:
360
+ current_paper['summary'] += ' ' + line.strip()
361
+ else:
362
+ current_paper['summary'] = line.strip()
363
+
364
+ if current_paper:
365
+ results.append(current_paper)
366
+
367
+ return results[:20]
368
+
369
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
370
+ titles_summary=True, full_audio=False):
371
+ start = time.time()
372
+
373
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
374
+ refs = client.predict(q, 20, "Semantic Search",
375
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
376
+ api_name="/update_with_rag_md")[0]
377
+ r2 = client.predict(q, "mistralai/Mixtral-8x7B-Instruct-v0.1",
378
+ True, api_name="/ask_llm")
379
+
380
+ result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
381
+ st.markdown(result)
382
+
383
+ md_file, audio_file = save_qa_with_audio(q, result)
384
+
385
+ st.subheader("📝 Main Response Audio")
386
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
387
+
388
+ papers = parse_arxiv_refs(refs)
389
+ if papers:
390
+ create_paper_audio_files(papers, input_question=q)
391
+ display_papers(papers, get_marquee_settings())
392
+ else:
393
+ st.warning("No papers found in the response.")
394
+
395
+ elapsed = time.time()-start
396
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
397
+ return result
398
+
399
+ def process_voice_input(text):
400
+ if not text:
401
+ return
402
+
403
+ st.subheader("🔍 Search Results")
404
+ result = perform_ai_lookup(
405
+ text,
406
+ vocal_summary=True,
407
+ extended_refs=False,
408
+ titles_summary=True,
409
+ full_audio=True
410
+ )
411
+
412
+ md_file, audio_file = save_qa_with_audio(text, result)
413
+
414
+ st.subheader("📝 Generated Files")
415
+ st.write(f"Markdown: {md_file}")
416
+ st.write(f"Audio: {audio_file}")
417
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
418
+
419
+ def load_files_for_sidebar():
420
+ md_files = glob.glob("*.md")
421
+ mp3_files = glob.glob("*.mp3")
422
+ wav_files = glob.glob("*.wav")
423
+
424
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
425
+ all_files = md_files + mp3_files + wav_files
426
+
427
+ groups = defaultdict(list)
428
+ prefix_length = len("MM_dd_yy_hh_mm_AP")
429
+
430
+ for f in all_files:
431
+ basename = os.path.basename(f)
432
+ if len(basename) >= prefix_length and '_' in basename:
433
+ group_name = basename[:prefix_length]
434
+ groups[group_name].append(f)
435
+ else:
436
+ groups['Other'].append(f)
437
+
438
+ sorted_groups = sorted(groups.items(),
439
+ key=lambda x: x[0] if x[0] != 'Other' else '',
440
+ reverse=True)
441
+ return sorted_groups
442
 
443
  def display_file_manager_sidebar(groups_sorted):
444
  st.sidebar.title("🎵 Audio & Docs Manager")
 
537
 
538
  return zip_name
539
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  def main():
541
+ # Update marquee settings UI first
542
+ update_marquee_settings_ui()
543
  marquee_settings = get_marquee_settings()
544
 
545
+ # Initial welcome marquee
546
  display_marquee(st.session_state['marquee_content'],
547
  {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
548
  key_suffix="welcome")
549
 
550
+ # Load files for sidebar
551
  groups_sorted = load_files_for_sidebar()
552
 
553
+ # Update marquee content when viewing files
554
  if st.session_state.viewing_prefix:
555
  for group_name, files in groups_sorted:
556
  if group_name == st.session_state.viewing_prefix:
 
559
  with open(f, 'r', encoding='utf-8') as file:
560
  st.session_state['marquee_content'] = file.read()[:280]
561
 
562
+ # Voice Settings
563
  st.sidebar.markdown("### 🎤 Voice Settings")
564
  selected_voice = st.sidebar.selectbox(
565
  "Select TTS Voice:",
 
567
  index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
568
  )
569
 
570
+ # Audio Format Settings
571
  st.sidebar.markdown("### 🔊 Audio Format")
572
  selected_format = st.sidebar.radio(
573
  "Choose Audio Format:",
 
582
  st.session_state['audio_format'] = selected_format.lower()
583
  st.rerun()
584
 
585
+ # Main Interface
586
  tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
587
  horizontal=True)
588
 
 
593
  val_stripped = val.replace('\\n', ' ')
594
  edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
595
 
596
+ run_option = st.selectbox("Model:", ["Arxiv"])
597
  col1, col2 = st.columns(2)
598
  with col1:
599
  autorun = st.checkbox("⚙ AutoRun", value=True)
 
606
  st.session_state.old_val = val
607
  st.session_state.last_query = edited_input
608
  result = perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
609
+ titles_summary=True, full_audio=full_audio)
 
610
  else:
611
  if st.button("▶ Run"):
612
  st.session_state.old_val = val
613
  st.session_state.last_query = edited_input
614
  result = perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
615
+ titles_summary=True, full_audio=full_audio)
 
616
 
617
  if tab_main == "🔍 ArXiv":
618
  st.subheader("🔍 Query ArXiv")
 
628
  if q and st.button("🔍Run"):
629
  st.session_state.last_query = q
630
  result = perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
631
+ titles_summary=titles_summary, full_audio=full_audio)
632
+ if full_transcript:
633
+ create_file(q, result, "md")
634
 
635
  elif tab_main == "🎤 Voice":
636
  st.subheader("🎤 Voice Input")
 
638
  user_text = user_text.strip().replace('\n', ' ')
639
 
640
  if st.button("📨 Send"):
641
+ process_voice_input(user_text)
642
 
643
+ st.subheader("📜 Chat History")
644
+ for c in st.session_state.chat_history:
645
+ st.write("**You:**", c["user"])
646
  st.write("**Response:**", c["claude"])
647
 
648
  elif tab_main == "📸 Media":
 
707
  else:
708
  st.write("Select a file from the sidebar to edit.")
709
 
710
+ # Display file manager in sidebar
711
  display_file_manager_sidebar(groups_sorted)
712
 
713
+ # Display viewed group content
714
  if st.session_state.viewing_prefix and any(st.session_state.viewing_prefix == group for group, _ in groups_sorted):
715
  st.write("---")
716
  st.write(f"**Viewing Group:** {st.session_state.viewing_prefix}")