awacke1 commited on
Commit
40ec6ea
·
verified ·
1 Parent(s): 6a8311c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -52
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import streamlit as st
2
  import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
3
- from datetime import datetime
4
  from audio_recorder_streamlit import audio_recorder
5
- from collections import defaultdict
6
  from dotenv import load_dotenv
7
  from gradio_client import Client
8
  from huggingface_hub import InferenceClient
@@ -12,14 +12,65 @@ from streamlit_marquee import streamlit_marquee
12
  import asyncio
13
  import edge_tts
14
 
15
- # App Config
16
- st.set_page_config(page_title="🚲TalkingAIResearcher🏆", page_icon="🚲🏆", layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  load_dotenv()
18
 
19
- EDGE_TTS_VOICES = ["en-US-AriaNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-GB-SoniaNeural"]
20
- FILE_EMOJIS = {"md": "📝", "mp3": "🎵", "wav": "🔊", "txt": "📄", "pdf": "📑", "html": "🌐"}
 
 
 
 
 
 
 
 
21
 
22
- # Initialize session state
23
  state_vars = {
24
  'tts_voice': EDGE_TTS_VOICES[0],
25
  'audio_format': 'mp3',
@@ -39,10 +90,7 @@ for key, default in state_vars.items():
39
  if key not in st.session_state:
40
  st.session_state[key] = default
41
 
42
- # API clients setup
43
- openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
44
- claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
45
-
46
  @st.cache_resource
47
  def get_cached_audio_b64(file_path):
48
  with open(file_path, "rb") as f:
@@ -52,6 +100,54 @@ def beautify_filename(filename):
52
  name = os.path.splitext(filename)[0]
53
  return name.replace('_', ' ').replace('.', ' ')
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def clean_speech_text(text):
56
  text = re.sub(r'\s+', ' ', text.strip())
57
  text = text.replace("</s>", "").replace("#", "")
@@ -79,39 +175,8 @@ def process_audio_file(audio_path):
79
  st.session_state.messages.append({"role": "user", "content": text})
80
  return text
81
 
82
- def process_with_gpt(text):
83
- if not text: return
84
- st.session_state.messages.append({"role": "user", "content": text})
85
- with st.chat_message("user"): st.markdown(text)
86
- with st.chat_message("assistant"):
87
- response = openai_client.chat.completions.create(
88
- model="gpt-4-turbo-preview",
89
- messages=st.session_state.messages,
90
- stream=False
91
- )
92
- answer = response.choices[0].message.content
93
- st.write(f"GPT-4: {answer}")
94
- create_file(text, answer, "md")
95
- st.session_state.messages.append({"role": "assistant", "content": answer})
96
- return answer
97
-
98
- def process_with_claude(text):
99
- if not text: return
100
- with st.chat_message("user"): st.markdown(text)
101
- with st.chat_message("assistant"):
102
- response = claude_client.messages.create(
103
- model="claude-3-sonnet-20240229",
104
- max_tokens=4000,
105
- messages=[{"role": "user", "content": text}]
106
- )
107
- answer = response.content[0].text
108
- st.write(f"Claude-3: {answer}")
109
- create_file(text, answer, "md")
110
- st.session_state.chat_history.append({"user": text, "claude": answer})
111
- return answer
112
-
113
  def load_files_for_sidebar():
114
- """Load and filter files by timestamp prefix"""
115
  files = []
116
  for f in glob.glob("*.*"):
117
  basename = os.path.basename(f)
@@ -189,6 +254,7 @@ def display_file_manager_sidebar(groups_sorted):
189
  height=0
190
  )
191
 
 
192
  def perform_arxiv_search(query):
193
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
194
  papers = client.predict(
@@ -260,12 +326,57 @@ def create_paper_audio(papers, query):
260
  st.write("### 📢 Summary")
261
  st.audio(summary_audio)
262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  def main():
264
- st.sidebar.markdown("### 🚲BikeAI🏆 Research Assistant")
265
 
266
  # Voice settings
267
- st.sidebar.markdown("### 🎤 Voice Config")
268
- voice = st.sidebar.selectbox("Voice:", EDGE_TTS_VOICES,
269
  index=EDGE_TTS_VOICES.index(st.session_state['tts_voice']))
270
  fmt = st.sidebar.radio("Format:", ["MP3", "WAV"], index=0)
271
 
@@ -287,7 +398,7 @@ def main():
287
  text = st.text_area("Message:", height=100).strip()
288
  if st.button("Send"):
289
  process_with_gpt(text)
290
-
291
  st.subheader("History")
292
  tab1, tab2 = st.tabs(["Claude", "GPT-4"])
293
  with tab1:
@@ -309,6 +420,7 @@ def main():
309
  st.success("Saved!")
310
  st.session_state.should_rerun = True
311
 
 
312
  groups = load_files_for_sidebar()
313
  display_file_manager_sidebar(groups)
314
 
@@ -316,19 +428,19 @@ def main():
316
  st.session_state.should_rerun = False
317
  st.rerun()
318
 
319
- sidebar_md = """# 📚 Research Papers
320
  ## AGI Levels
321
  L0 ❌ No AI
322
  L1 🌱 ChatGPT [2303.08774](https://arxiv.org/abs/2303.08774) | [PDF](https://arxiv.org/pdf/2303.08774.pdf)
323
  L2 💪 Watson [2201.11903](https://arxiv.org/abs/2201.11903) | [PDF](https://arxiv.org/pdf/2201.11903.pdf)
324
  L3 🎯 DALL·E [2204.06125](https://arxiv.org/abs/2204.06125) | [PDF](https://arxiv.org/pdf/2204.06125.pdf)
325
  L4 🏆 AlphaGo [1712.01815](https://arxiv.org/abs/1712.01815) | [PDF](https://arxiv.org/pdf/1712.01815.pdf)
326
- L5 🚀 AlphaFold [L5 🚀 AlphaFold [2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
327
 
328
- ## 🧬 AlphaFold2
329
  [2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
330
- 1. 🧬 Input → 2. 🔍 Search → 3. 🧩 MSA
331
- 4. 📑 Templates → 5. 🔄 Evoformer → 6. 🧱 Structure
332
  7. 🎯 3D Predict → 8. ♻️ Recycle"""
333
 
334
  st.sidebar.markdown(sidebar_md)
 
1
  import streamlit as st
2
  import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
3
+ from datetime import datetime
4
  from audio_recorder_streamlit import audio_recorder
5
+ from collections import defaultdict, deque, Counter
6
  from dotenv import load_dotenv
7
  from gradio_client import Client
8
  from huggingface_hub import InferenceClient
 
12
  import asyncio
13
  import edge_tts
14
 
15
+ # Core Configuration
16
+ st.set_page_config(
17
+ page_title="🚲TalkingAIResearcher🏆",
18
+ page_icon="🚲🏆",
19
+ layout="wide",
20
+ initial_sidebar_state="auto",
21
+ menu_items={
22
+ 'Get Help': 'https://huggingface.co/awacke1',
23
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
24
+ 'About': "🚲TalkingAIResearcher🏆"
25
+ }
26
+ )
27
+
28
+ # Custom CSS
29
+ st.markdown("""
30
+ <style>
31
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
32
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
33
+ .stButton>button { margin-right: 0.5rem; }
34
+ </style>
35
+ """, unsafe_allow_html=True)
36
+
37
+ # Constants
38
+ EDGE_TTS_VOICES = [
39
+ "en-US-AriaNeural", # Default voice
40
+ "en-US-GuyNeural",
41
+ "en-US-JennyNeural",
42
+ "en-GB-SoniaNeural",
43
+ "en-GB-RyanNeural",
44
+ "en-AU-NatashaNeural",
45
+ "en-AU-WilliamNeural",
46
+ "en-CA-ClaraNeural",
47
+ "en-CA-LiamNeural"
48
+ ]
49
+
50
+ FILE_EMOJIS = {
51
+ "md": "📝",
52
+ "mp3": "🎵",
53
+ "wav": "🔊",
54
+ "txt": "📄",
55
+ "pdf": "📑",
56
+ "html": "🌐"
57
+ }
58
+
59
+ # Load environment variables
60
  load_dotenv()
61
 
62
+ # API Setup
63
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
64
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY', "")
65
+ if 'OPENAI_API_KEY' in st.secrets:
66
+ openai_api_key = st.secrets['OPENAI_API_KEY']
67
+ if 'ANTHROPIC_API_KEY' in st.secrets:
68
+ anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
69
+
70
+ openai_client = OpenAI(api_key=openai_api_key)
71
+ claude_client = anthropic.Anthropic(api_key=anthropic_key)
72
 
73
+ # Initialize Session State
74
  state_vars = {
75
  'tts_voice': EDGE_TTS_VOICES[0],
76
  'audio_format': 'mp3',
 
90
  if key not in st.session_state:
91
  st.session_state[key] = default
92
 
93
+ # Core Functions
 
 
 
94
  @st.cache_resource
95
  def get_cached_audio_b64(file_path):
96
  with open(file_path, "rb") as f:
 
100
  name = os.path.splitext(filename)[0]
101
  return name.replace('_', ' ').replace('.', ' ')
102
 
103
+ def display_marquee_controls():
104
+ st.sidebar.markdown("### 🎯 Marquee Settings")
105
+ cols = st.sidebar.columns(2)
106
+ with cols[0]:
107
+ bg_color = st.color_picker("🎨 Background", "#1E1E1E")
108
+ text_color = st.color_picker("✍️ Text", "#FFFFFF")
109
+ with cols[1]:
110
+ font_size = st.slider("📏 Size", 10, 24, 14)
111
+ duration = st.slider("⏱️ Speed", 1, 20, 10)
112
+
113
+ return {
114
+ "background": bg_color,
115
+ "color": text_color,
116
+ "font-size": f"{font_size}px",
117
+ "animationDuration": f"{duration}s",
118
+ "width": "100%",
119
+ "lineHeight": "35px"
120
+ }
121
+
122
+ def get_high_info_terms(text: str, top_n=10) -> list:
123
+ stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for'])
124
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
125
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
126
+ combined = words + bi_grams
127
+ filtered = [term for term in combined if term not in stop_words and len(term) > 3]
128
+ return Counter(filtered).most_common(top_n)
129
+
130
+ def generate_filename(prompt, response, file_type="md"):
131
+ prefix = datetime.now().strftime("%y%m_%H%M") + "_"
132
+ combined = (prompt + " " + response).strip()
133
+ name_parts = [term for term, _ in get_high_info_terms(combined, top_n=5)]
134
+ filename = prefix + "_".join(name_parts)[:150] + "." + file_type
135
+ return filename
136
+
137
+ def create_file(prompt, response, file_type="md"):
138
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
139
+ with open(filename, 'w', encoding='utf-8') as f:
140
+ f.write(f"{prompt}\n\n{response}")
141
+ return filename
142
+
143
+ def get_download_link(file_path, file_type="zip"):
144
+ with open(file_path, "rb") as f:
145
+ b64 = base64.b64encode(f.read()).decode()
146
+ ext_map = {'zip': '📦', 'mp3': '🎵', 'wav': '🔊', 'md': '📝'}
147
+ emoji = ext_map.get(file_type, '')
148
+ return f'<a href="data:application/{file_type};base64,{b64}" download="{os.path.basename(file_path)}">{emoji} Download {os.path.basename(file_path)}</a>'
149
+
150
+ # Audio Processing
151
  def clean_speech_text(text):
152
  text = re.sub(r'\s+', ' ', text.strip())
153
  text = text.replace("</s>", "").replace("#", "")
 
175
  st.session_state.messages.append({"role": "user", "content": text})
176
  return text
177
 
178
+ # File Management
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  def load_files_for_sidebar():
 
180
  files = []
181
  for f in glob.glob("*.*"):
182
  basename = os.path.basename(f)
 
254
  height=0
255
  )
256
 
257
+ # ArXiv Integration
258
  def perform_arxiv_search(query):
259
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
260
  papers = client.predict(
 
326
  st.write("### 📢 Summary")
327
  st.audio(summary_audio)
328
 
329
+ def display_papers(papers):
330
+ st.write("## Research Papers")
331
+ for i, paper in enumerate(papers[:20], 1):
332
+ with st.expander(f"{i}. 📄 {paper['title']}", expanded=True):
333
+ st.markdown(f"**{paper['date']} | {paper['title']} | ⬇️**")
334
+ st.markdown(f"*{paper['authors']}*")
335
+ st.markdown(paper[previous code] ... st.markdown(paper['summary'])
336
+ if paper.get('audio'):
337
+ st.write("📚 Paper Audio")
338
+ st.audio(paper['audio'])
339
+
340
+ def process_with_gpt(text):
341
+ if not text: return
342
+ st.session_state.messages.append({"role": "user", "content": text})
343
+ with st.chat_message("user"):
344
+ st.markdown(text)
345
+ with st.chat_message("assistant"):
346
+ response = openai_client.chat.completions.create(
347
+ model="gpt-4-turbo-preview",
348
+ messages=st.session_state.messages,
349
+ stream=False
350
+ )
351
+ answer = response.choices[0].message.content
352
+ st.write(f"GPT-4: {answer}")
353
+ create_file(text, answer, "md")
354
+ st.session_state.messages.append({"role": "assistant", "content": answer})
355
+ return answer
356
+
357
+ def process_with_claude(text):
358
+ if not text: return
359
+ with st.chat_message("user"):
360
+ st.markdown(text)
361
+ with st.chat_message("assistant"):
362
+ response = claude_client.messages.create(
363
+ model="claude-3-sonnet-20240229",
364
+ max_tokens=4000,
365
+ messages=[{"role": "user", "content": text}]
366
+ )
367
+ answer = response.content[0].text
368
+ st.write(f"Claude-3: {answer}")
369
+ create_file(text, answer, "md")
370
+ st.session_state.chat_history.append({"user": text, "claude": answer})
371
+ return answer
372
+
373
+ # Main App
374
  def main():
375
+ st.sidebar.title("🚲 Research Assistant")
376
 
377
  # Voice settings
378
+ st.sidebar.markdown("### 🎤 Voice Config")
379
+ voice = st.sidebar.selectbox("Voice:", EDGE_TTS_VOICES,
380
  index=EDGE_TTS_VOICES.index(st.session_state['tts_voice']))
381
  fmt = st.sidebar.radio("Format:", ["MP3", "WAV"], index=0)
382
 
 
398
  text = st.text_area("Message:", height=100).strip()
399
  if st.button("Send"):
400
  process_with_gpt(text)
401
+
402
  st.subheader("History")
403
  tab1, tab2 = st.tabs(["Claude", "GPT-4"])
404
  with tab1:
 
420
  st.success("Saved!")
421
  st.session_state.should_rerun = True
422
 
423
+ # File management
424
  groups = load_files_for_sidebar()
425
  display_file_manager_sidebar(groups)
426
 
 
428
  st.session_state.should_rerun = False
429
  st.rerun()
430
 
431
+ sidebar_md = """# 📚 Research
432
  ## AGI Levels
433
  L0 ❌ No AI
434
  L1 🌱 ChatGPT [2303.08774](https://arxiv.org/abs/2303.08774) | [PDF](https://arxiv.org/pdf/2303.08774.pdf)
435
  L2 💪 Watson [2201.11903](https://arxiv.org/abs/2201.11903) | [PDF](https://arxiv.org/pdf/2201.11903.pdf)
436
  L3 🎯 DALL·E [2204.06125](https://arxiv.org/abs/2204.06125) | [PDF](https://arxiv.org/pdf/2204.06125.pdf)
437
  L4 🏆 AlphaGo [1712.01815](https://arxiv.org/abs/1712.01815) | [PDF](https://arxiv.org/pdf/1712.01815.pdf)
438
+ L5 🚀 AlphaFold [2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
439
 
440
+ ## 🧬 AlphaFold2
441
  [2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
442
+ 1. 🧬 Input → 2. 🔍 Search → 3. 🧩 MSA
443
+ 4. 📑 Templates → 5. 🔄 Evoformer → 6. 🧱 Structure
444
  7. 🎯 3D Predict → 8. ♻️ Recycle"""
445
 
446
  st.sidebar.markdown(sidebar_md)