awacke1 commited on
Commit
6a8311c
·
verified ·
1 Parent(s): 1c134dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -103
app.py CHANGED
@@ -1,34 +1,48 @@
1
  import streamlit as st
2
  import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
3
- from datetime import datetime
4
  from audio_recorder_streamlit import audio_recorder
5
  from collections import defaultdict
6
  from dotenv import load_dotenv
7
  from gradio_client import Client
8
  from huggingface_hub import InferenceClient
9
  from PIL import Image
 
10
  from streamlit_marquee import streamlit_marquee
11
  import asyncio
12
  import edge_tts
13
 
 
14
  st.set_page_config(page_title="🚲TalkingAIResearcher🏆", page_icon="🚲🏆", layout="wide")
 
15
 
16
  EDGE_TTS_VOICES = ["en-US-AriaNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-GB-SoniaNeural"]
17
- FILE_EMOJIS = {"md": "📝", "mp3": "🎵", "wav": "🔊", "txt": "📄", "pdf": "📑"}
18
 
19
- # Session state initialization
20
- for key, default in {
21
  'tts_voice': EDGE_TTS_VOICES[0],
22
  'audio_format': 'mp3',
23
  'messages': [],
24
  'chat_history': [],
25
  'transcript_history': [],
26
  'viewing_prefix': None,
27
- 'should_rerun': False
28
- }.items():
 
 
 
 
 
 
 
29
  if key not in st.session_state:
30
  st.session_state[key] = default
31
 
 
 
 
 
32
  @st.cache_resource
33
  def get_cached_audio_b64(file_path):
34
  with open(file_path, "rb") as f:
@@ -38,8 +52,66 @@ def beautify_filename(filename):
38
  name = os.path.splitext(filename)[0]
39
  return name.replace('_', ' ').replace('.', ' ')
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def load_files_for_sidebar():
42
- """Load and filter files for sidebar by timestamp prefix"""
43
  files = []
44
  for f in glob.glob("*.*"):
45
  basename = os.path.basename(f)
@@ -59,42 +131,6 @@ def load_files_for_sidebar():
59
  key=lambda x: max(os.path.getmtime(f) for f in x[1]),
60
  reverse=True)
61
 
62
- def display_marquee_controls():
63
- st.sidebar.markdown("### 🎯 Marquee Settings")
64
- cols = st.sidebar.columns(2)
65
- with cols[0]:
66
- bg_color = st.color_picker("🎨 Background", "#1E1E1E")
67
- text_color = st.color_picker("✍️ Text", "#FFFFFF")
68
- with cols[1]:
69
- font_size = st.slider("📏 Size", 10, 24, 14)
70
- duration = st.slider("⏱️ Speed", 1, 20, 10)
71
-
72
- return {
73
- "background": bg_color,
74
- "color": text_color,
75
- "font-size": f"{font_size}px",
76
- "animationDuration": f"{duration}s",
77
- "width": "100%",
78
- "lineHeight": "35px"
79
- }
80
-
81
- def get_download_link(file_path, file_type="zip"):
82
- with open(file_path, "rb") as f:
83
- b64 = base64.b64encode(f.read()).decode()
84
- ext_map = {'zip': '📦', 'mp3': '🎵', 'wav': '🔊', 'md': '📝'}
85
- emoji = ext_map.get(file_type, '')
86
- return f'<a href="data:application/{file_type};base64,{b64}" download="{os.path.basename(file_path)}">{emoji} Download {os.path.basename(file_path)}</a>'
87
-
88
- def create_zip_of_files(md_files, mp3_files, wav_files, query=''):
89
- all_files = md_files + mp3_files + wav_files
90
- if not all_files: return None
91
- timestamp = datetime.now().strftime("%y%m_%H%M")
92
- zip_name = f"{timestamp}_archive.zip"
93
- with zipfile.ZipFile(zip_name, 'w') as z:
94
- for f in all_files:
95
- z.write(f)
96
- return zip_name
97
-
98
  def display_file_manager_sidebar(groups_sorted):
99
  st.sidebar.title("📚 File Manager")
100
  all_files = {'md': [], 'mp3': [], 'wav': []}
@@ -153,79 +189,126 @@ def display_file_manager_sidebar(groups_sorted):
153
  height=0
154
  )
155
 
156
- async def edge_tts_generate(text, voice, file_format="mp3"):
157
- text = re.sub(r'\s+', ' ', text).strip()
158
- if not text: return None
159
- communicate = edge_tts.Communicate(text, voice)
160
- filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}"
161
- await communicate.save(filename)
162
- return filename
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  def parse_arxiv_refs(text):
165
  papers = []
166
- current_paper = None
167
 
168
  for line in text.split('\n'):
169
  if '|' in line:
170
- if current_paper:
171
- papers.append(current_paper)
172
  parts = line.strip('* ').split('|')
173
- current_paper = {
174
  'date': parts[0].strip(),
175
  'title': parts[1].strip(),
176
  'authors': '',
177
  'summary': '',
178
  'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else ''
179
  }
180
- elif current_paper:
181
- if not current_paper['authors']:
182
- current_paper['authors'] = line.strip('* ')
183
  else:
184
- current_paper['summary'] += ' ' + line.strip()
185
 
186
- if current_paper:
187
- papers.append(current_paper)
188
- return papers
189
 
190
- def perform_ai_lookup(query):
191
- client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
192
- response = client.predict(
193
- query, 20, "Semantic Search",
194
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
195
- api_name="/update_with_rag_md"
196
- )
197
-
198
- papers = parse_arxiv_refs(response[0])
199
- marquee_settings = display_marquee_controls()
200
-
201
  for paper in papers:
202
- content = f"📄 {paper['title']} | 👤 {paper['authors']} | 📝 {paper['summary']}"
203
- streamlit_marquee(
204
- content=content,
205
- **marquee_settings,
206
- key=f"paper_{paper['id'] or random.randint(1000,9999)}"
207
- )
208
- st.write("")
209
-
210
- return papers
 
 
 
 
 
 
 
 
211
 
212
  def main():
213
- marquee_settings = display_marquee_controls()
214
 
215
- streamlit_marquee(
216
- content="🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant",
217
- **marquee_settings,
218
- key="welcome"
219
- )
220
 
221
- tab = st.radio("Action:", ["🎤 Voice", "🔍 ArXiv", "📝 Editor"], horizontal=True)
 
 
 
 
 
 
 
222
 
223
- if tab == "🔍 ArXiv":
224
  query = st.text_input("🔍 Search:")
225
  if query:
226
- papers = perform_ai_lookup(query)
227
- st.write(f"Found {len(papers)} papers")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
 
 
 
 
 
 
 
 
 
 
229
  groups = load_files_for_sidebar()
230
  display_file_manager_sidebar(groups)
231
 
@@ -233,22 +316,20 @@ def main():
233
  st.session_state.should_rerun = False
234
  st.rerun()
235
 
236
- # Condensed sidebar markdown
237
  sidebar_md = """# 📚 Research Papers
238
-
239
- ## 🧠 AGI Levels
240
  L0 ❌ No AI
241
- L1 🌱 ChatGPT/Bard [2303.08774v1](https://arxiv.org/abs/2303.08774) [PDF](https://arxiv.org/pdf/2303.08774.pdf)
242
- L2 💪 Watson [2201.11903v1](https://arxiv.org/abs/2201.11903) [PDF](https://arxiv.org/pdf/2201.11903.pdf)
243
- L3 🎯 DALL·E [2204.06125v1](https://arxiv.org/abs/2204.06125) [PDF](https://arxiv.org/pdf/2204.06125.pdf)
244
- L4 🏆 AlphaGo [1712.01815v1](https://arxiv.org/abs/1712.01815) [PDF](https://arxiv.org/pdf/1712.01815.pdf)
245
- L5 🚀 AlphaFold [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
246
 
247
  ## 🧬 AlphaFold2
248
- [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
249
- 1. 🧬 Input Seq → 2. 🔍 DB Search → 3. 🧩 MSA
250
- 4. 📑 Templates → 5. 🔄 Evoformer → 6. 🧱 Structure
251
- 7. 🎯 3D Predict → 8. ♻️ Recycle x3"""
252
 
253
  st.sidebar.markdown(sidebar_md)
254
 
 
1
  import streamlit as st
2
  import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
3
+ from datetime import datetime
4
  from audio_recorder_streamlit import audio_recorder
5
  from collections import defaultdict
6
  from dotenv import load_dotenv
7
  from gradio_client import Client
8
  from huggingface_hub import InferenceClient
9
  from PIL import Image
10
+ from openai import OpenAI
11
  from streamlit_marquee import streamlit_marquee
12
  import asyncio
13
  import edge_tts
14
 
15
+ # App Config
16
  st.set_page_config(page_title="🚲TalkingAIResearcher🏆", page_icon="🚲🏆", layout="wide")
17
+ load_dotenv()
18
 
19
  EDGE_TTS_VOICES = ["en-US-AriaNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-GB-SoniaNeural"]
20
+ FILE_EMOJIS = {"md": "📝", "mp3": "🎵", "wav": "🔊", "txt": "📄", "pdf": "📑", "html": "🌐"}
21
 
22
+ # Initialize session state
23
+ state_vars = {
24
  'tts_voice': EDGE_TTS_VOICES[0],
25
  'audio_format': 'mp3',
26
  'messages': [],
27
  'chat_history': [],
28
  'transcript_history': [],
29
  'viewing_prefix': None,
30
+ 'should_rerun': False,
31
+ 'editing_mode': False,
32
+ 'current_file': None,
33
+ 'file_content': None,
34
+ 'old_val': None,
35
+ 'last_query': ''
36
+ }
37
+
38
+ for key, default in state_vars.items():
39
  if key not in st.session_state:
40
  st.session_state[key] = default
41
 
42
+ # API clients setup
43
+ openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
44
+ claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
45
+
46
  @st.cache_resource
47
  def get_cached_audio_b64(file_path):
48
  with open(file_path, "rb") as f:
 
52
  name = os.path.splitext(filename)[0]
53
  return name.replace('_', ' ').replace('.', ' ')
54
 
55
+ def clean_speech_text(text):
56
+ text = re.sub(r'\s+', ' ', text.strip())
57
+ text = text.replace("</s>", "").replace("#", "")
58
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
59
+ return text
60
+
61
+ async def edge_tts_generate(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
62
+ text = clean_speech_text(text)
63
+ if not text: return None
64
+ communicate = edge_tts.Communicate(text, voice, rate=f"{rate}%", pitch=f"{pitch}Hz")
65
+ filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}"
66
+ await communicate.save(filename)
67
+ return filename
68
+
69
+ def speak_text(text, voice=None, file_format=None):
70
+ if not text: return None
71
+ voice = voice or st.session_state['tts_voice']
72
+ fmt = file_format or st.session_state['audio_format']
73
+ return asyncio.run(edge_tts_generate(text, voice, file_format=fmt))
74
+
75
+ def process_audio_file(audio_path):
76
+ with open(audio_path, "rb") as f:
77
+ transcript = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
78
+ text = transcript.text
79
+ st.session_state.messages.append({"role": "user", "content": text})
80
+ return text
81
+
82
+ def process_with_gpt(text):
83
+ if not text: return
84
+ st.session_state.messages.append({"role": "user", "content": text})
85
+ with st.chat_message("user"): st.markdown(text)
86
+ with st.chat_message("assistant"):
87
+ response = openai_client.chat.completions.create(
88
+ model="gpt-4-turbo-preview",
89
+ messages=st.session_state.messages,
90
+ stream=False
91
+ )
92
+ answer = response.choices[0].message.content
93
+ st.write(f"GPT-4: {answer}")
94
+ create_file(text, answer, "md")
95
+ st.session_state.messages.append({"role": "assistant", "content": answer})
96
+ return answer
97
+
98
+ def process_with_claude(text):
99
+ if not text: return
100
+ with st.chat_message("user"): st.markdown(text)
101
+ with st.chat_message("assistant"):
102
+ response = claude_client.messages.create(
103
+ model="claude-3-sonnet-20240229",
104
+ max_tokens=4000,
105
+ messages=[{"role": "user", "content": text}]
106
+ )
107
+ answer = response.content[0].text
108
+ st.write(f"Claude-3: {answer}")
109
+ create_file(text, answer, "md")
110
+ st.session_state.chat_history.append({"user": text, "claude": answer})
111
+ return answer
112
+
113
  def load_files_for_sidebar():
114
+ """Load and filter files by timestamp prefix"""
115
  files = []
116
  for f in glob.glob("*.*"):
117
  basename = os.path.basename(f)
 
131
  key=lambda x: max(os.path.getmtime(f) for f in x[1]),
132
  reverse=True)
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  def display_file_manager_sidebar(groups_sorted):
135
  st.sidebar.title("📚 File Manager")
136
  all_files = {'md': [], 'mp3': [], 'wav': []}
 
189
  height=0
190
  )
191
 
192
+ def perform_arxiv_search(query):
193
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
194
+ papers = client.predict(
195
+ query, 20, "Semantic Search",
196
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
197
+ api_name="/update_with_rag_md"
198
+ )[0]
199
+
200
+ summary = client.predict(
201
+ query,
202
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
203
+ True,
204
+ api_name="/ask_llm"
205
+ )
206
+
207
+ result = f"### 🔎 {query}\n\n{summary}\n\n{papers}"
208
+ st.markdown(result)
209
+
210
+ papers_data = parse_arxiv_refs(papers)
211
+ if papers_data:
212
+ create_paper_audio(papers_data, query)
213
+ display_papers(papers_data)
214
+
215
+ create_file(query, result, "md")
216
+ return result
217
 
218
  def parse_arxiv_refs(text):
219
  papers = []
220
+ current = None
221
 
222
  for line in text.split('\n'):
223
  if '|' in line:
224
+ if current: papers.append(current)
 
225
  parts = line.strip('* ').split('|')
226
+ current = {
227
  'date': parts[0].strip(),
228
  'title': parts[1].strip(),
229
  'authors': '',
230
  'summary': '',
231
  'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else ''
232
  }
233
+ elif current:
234
+ if not current['authors']:
235
+ current['authors'] = line.strip('* ')
236
  else:
237
+ current['summary'] += ' ' + line.strip()
238
 
239
+ if current: papers.append(current)
240
+ return papers[:20]
 
241
 
242
+ def create_paper_audio(papers, query):
243
+ combined = []
 
 
 
 
 
 
 
 
 
244
  for paper in papers:
245
+ try:
246
+ text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
247
+ file_format = st.session_state['audio_format']
248
+ audio_file = speak_text(text, file_format=file_format)
249
+ paper['audio'] = audio_file
250
+ st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(audio_file)}")
251
+ st.audio(audio_file)
252
+ combined.append(paper['title'])
253
+ except Exception as e:
254
+ st.warning(f"Error generating audio for {paper['title']}: {str(e)}")
255
+
256
+ if combined:
257
+ summary = f"Found papers about: {'; '.join(combined)}. Query was: {query}"
258
+ summary_audio = speak_text(summary)
259
+ if summary_audio:
260
+ st.write("### 📢 Summary")
261
+ st.audio(summary_audio)
262
 
263
  def main():
264
+ st.sidebar.markdown("### 🚲BikeAI🏆 Research Assistant")
265
 
266
+ # Voice settings
267
+ st.sidebar.markdown("### 🎤 Voice Config")
268
+ voice = st.sidebar.selectbox("Voice:", EDGE_TTS_VOICES,
269
+ index=EDGE_TTS_VOICES.index(st.session_state['tts_voice']))
270
+ fmt = st.sidebar.radio("Format:", ["MP3", "WAV"], index=0)
271
 
272
+ if voice != st.session_state['tts_voice']:
273
+ st.session_state['tts_voice'] = voice
274
+ st.rerun()
275
+ if fmt.lower() != st.session_state['audio_format']:
276
+ st.session_state['audio_format'] = fmt.lower()
277
+ st.rerun()
278
+
279
+ mode = st.radio("Mode:", ["🎤 Voice", "🔍 ArXiv", "📝 Editor"], horizontal=True)
280
 
281
+ if mode == "🔍 ArXiv":
282
  query = st.text_input("🔍 Search:")
283
  if query:
284
+ perform_arxiv_search(query)
285
+
286
+ elif mode == "🎤 Voice":
287
+ text = st.text_area("Message:", height=100).strip()
288
+ if st.button("Send"):
289
+ process_with_gpt(text)
290
+
291
+ st.subheader("History")
292
+ tab1, tab2 = st.tabs(["Claude", "GPT-4"])
293
+ with tab1:
294
+ for msg in st.session_state.chat_history:
295
+ st.write("You:", msg["user"])
296
+ st.write("Claude:", msg["claude"])
297
+ with tab2:
298
+ for msg in st.session_state.messages:
299
+ with st.chat_message(msg["role"]):
300
+ st.markdown(msg["content"])
301
 
302
+ elif mode == "📝 Editor":
303
+ if st.session_state.current_file:
304
+ st.subheader(f"Editing: {st.session_state.current_file}")
305
+ new_content = st.text_area("Content:", st.session_state.file_content, height=300)
306
+ if st.button("Save"):
307
+ with open(st.session_state.current_file, 'w') as f:
308
+ f.write(new_content)
309
+ st.success("Saved!")
310
+ st.session_state.should_rerun = True
311
+
312
  groups = load_files_for_sidebar()
313
  display_file_manager_sidebar(groups)
314
 
 
316
  st.session_state.should_rerun = False
317
  st.rerun()
318
 
 
319
  sidebar_md = """# 📚 Research Papers
320
+ ## AGI Levels
 
321
  L0 ❌ No AI
322
+ L1 🌱 ChatGPT [2303.08774](https://arxiv.org/abs/2303.08774) | [PDF](https://arxiv.org/pdf/2303.08774.pdf)
323
+ L2 💪 Watson [2201.11903](https://arxiv.org/abs/2201.11903) | [PDF](https://arxiv.org/pdf/2201.11903.pdf)
324
+ L3 🎯 DALL·E [2204.06125](https://arxiv.org/abs/2204.06125) | [PDF](https://arxiv.org/pdf/2204.06125.pdf)
325
+ L4 🏆 AlphaGo [1712.01815](https://arxiv.org/abs/1712.01815) | [PDF](https://arxiv.org/pdf/1712.01815.pdf)
326
+ L5 🚀 AlphaFold [L5 🚀 AlphaFold [2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
327
 
328
  ## 🧬 AlphaFold2
329
+ [2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
330
+ 1. 🧬 Input → 2. 🔍 Search → 3. 🧩 MSA
331
+ 4. 📑 Templates → 5. 🔄 Evoformer → 6. 🧱 Structure
332
+ 7. 🎯 3D Predict → 8. ♻️ Recycle"""
333
 
334
  st.sidebar.markdown(sidebar_md)
335