Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
|
3 |
-
from datetime import datetime
|
4 |
from audio_recorder_streamlit import audio_recorder
|
5 |
-
from collections import defaultdict
|
6 |
from dotenv import load_dotenv
|
7 |
from gradio_client import Client
|
8 |
from huggingface_hub import InferenceClient
|
@@ -12,14 +12,65 @@ from streamlit_marquee import streamlit_marquee
|
|
12 |
import asyncio
|
13 |
import edge_tts
|
14 |
|
15 |
-
#
|
16 |
-
st.set_page_config(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
load_dotenv()
|
18 |
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
# Initialize
|
23 |
state_vars = {
|
24 |
'tts_voice': EDGE_TTS_VOICES[0],
|
25 |
'audio_format': 'mp3',
|
@@ -39,10 +90,7 @@ for key, default in state_vars.items():
|
|
39 |
if key not in st.session_state:
|
40 |
st.session_state[key] = default
|
41 |
|
42 |
-
#
|
43 |
-
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
44 |
-
claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
|
45 |
-
|
46 |
@st.cache_resource
|
47 |
def get_cached_audio_b64(file_path):
|
48 |
with open(file_path, "rb") as f:
|
@@ -52,6 +100,54 @@ def beautify_filename(filename):
|
|
52 |
name = os.path.splitext(filename)[0]
|
53 |
return name.replace('_', ' ').replace('.', ' ')
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
def clean_speech_text(text):
|
56 |
text = re.sub(r'\s+', ' ', text.strip())
|
57 |
text = text.replace("</s>", "").replace("#", "")
|
@@ -79,39 +175,8 @@ def process_audio_file(audio_path):
|
|
79 |
st.session_state.messages.append({"role": "user", "content": text})
|
80 |
return text
|
81 |
|
82 |
-
|
83 |
-
if not text: return
|
84 |
-
st.session_state.messages.append({"role": "user", "content": text})
|
85 |
-
with st.chat_message("user"): st.markdown(text)
|
86 |
-
with st.chat_message("assistant"):
|
87 |
-
response = openai_client.chat.completions.create(
|
88 |
-
model="gpt-4-turbo-preview",
|
89 |
-
messages=st.session_state.messages,
|
90 |
-
stream=False
|
91 |
-
)
|
92 |
-
answer = response.choices[0].message.content
|
93 |
-
st.write(f"GPT-4: {answer}")
|
94 |
-
create_file(text, answer, "md")
|
95 |
-
st.session_state.messages.append({"role": "assistant", "content": answer})
|
96 |
-
return answer
|
97 |
-
|
98 |
-
def process_with_claude(text):
|
99 |
-
if not text: return
|
100 |
-
with st.chat_message("user"): st.markdown(text)
|
101 |
-
with st.chat_message("assistant"):
|
102 |
-
response = claude_client.messages.create(
|
103 |
-
model="claude-3-sonnet-20240229",
|
104 |
-
max_tokens=4000,
|
105 |
-
messages=[{"role": "user", "content": text}]
|
106 |
-
)
|
107 |
-
answer = response.content[0].text
|
108 |
-
st.write(f"Claude-3: {answer}")
|
109 |
-
create_file(text, answer, "md")
|
110 |
-
st.session_state.chat_history.append({"user": text, "claude": answer})
|
111 |
-
return answer
|
112 |
-
|
113 |
def load_files_for_sidebar():
|
114 |
-
"""Load and filter files by timestamp prefix"""
|
115 |
files = []
|
116 |
for f in glob.glob("*.*"):
|
117 |
basename = os.path.basename(f)
|
@@ -189,6 +254,7 @@ def display_file_manager_sidebar(groups_sorted):
|
|
189 |
height=0
|
190 |
)
|
191 |
|
|
|
192 |
def perform_arxiv_search(query):
|
193 |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
|
194 |
papers = client.predict(
|
@@ -260,12 +326,57 @@ def create_paper_audio(papers, query):
|
|
260 |
st.write("### 📢 Summary")
|
261 |
st.audio(summary_audio)
|
262 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
def main():
|
264 |
-
st.sidebar.
|
265 |
|
266 |
# Voice settings
|
267 |
-
st.sidebar.markdown("### 🎤 Voice Config")
|
268 |
-
voice = st.sidebar.selectbox("Voice:", EDGE_TTS_VOICES,
|
269 |
index=EDGE_TTS_VOICES.index(st.session_state['tts_voice']))
|
270 |
fmt = st.sidebar.radio("Format:", ["MP3", "WAV"], index=0)
|
271 |
|
@@ -287,7 +398,7 @@ def main():
|
|
287 |
text = st.text_area("Message:", height=100).strip()
|
288 |
if st.button("Send"):
|
289 |
process_with_gpt(text)
|
290 |
-
|
291 |
st.subheader("History")
|
292 |
tab1, tab2 = st.tabs(["Claude", "GPT-4"])
|
293 |
with tab1:
|
@@ -309,6 +420,7 @@ def main():
|
|
309 |
st.success("Saved!")
|
310 |
st.session_state.should_rerun = True
|
311 |
|
|
|
312 |
groups = load_files_for_sidebar()
|
313 |
display_file_manager_sidebar(groups)
|
314 |
|
@@ -316,19 +428,19 @@ def main():
|
|
316 |
st.session_state.should_rerun = False
|
317 |
st.rerun()
|
318 |
|
319 |
-
sidebar_md = """# 📚 Research
|
320 |
## AGI Levels
|
321 |
L0 ❌ No AI
|
322 |
L1 🌱 ChatGPT [2303.08774](https://arxiv.org/abs/2303.08774) | [PDF](https://arxiv.org/pdf/2303.08774.pdf)
|
323 |
L2 💪 Watson [2201.11903](https://arxiv.org/abs/2201.11903) | [PDF](https://arxiv.org/pdf/2201.11903.pdf)
|
324 |
L3 🎯 DALL·E [2204.06125](https://arxiv.org/abs/2204.06125) | [PDF](https://arxiv.org/pdf/2204.06125.pdf)
|
325 |
L4 🏆 AlphaGo [1712.01815](https://arxiv.org/abs/1712.01815) | [PDF](https://arxiv.org/pdf/1712.01815.pdf)
|
326 |
-
L5 🚀 AlphaFold [
|
327 |
|
328 |
-
## 🧬 AlphaFold2
|
329 |
[2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
|
330 |
-
1. 🧬 Input → 2. 🔍 Search → 3. 🧩 MSA
|
331 |
-
4. 📑 Templates → 5. 🔄 Evoformer → 6. 🧱 Structure
|
332 |
7. 🎯 3D Predict → 8. ♻️ Recycle"""
|
333 |
|
334 |
st.sidebar.markdown(sidebar_md)
|
|
|
1 |
import streamlit as st
|
2 |
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
|
3 |
+
from datetime import datetime
|
4 |
from audio_recorder_streamlit import audio_recorder
|
5 |
+
from collections import defaultdict, deque, Counter
|
6 |
from dotenv import load_dotenv
|
7 |
from gradio_client import Client
|
8 |
from huggingface_hub import InferenceClient
|
|
|
12 |
import asyncio
|
13 |
import edge_tts
|
14 |
|
15 |
+
# Core Configuration
|
16 |
+
st.set_page_config(
|
17 |
+
page_title="🚲TalkingAIResearcher🏆",
|
18 |
+
page_icon="🚲🏆",
|
19 |
+
layout="wide",
|
20 |
+
initial_sidebar_state="auto",
|
21 |
+
menu_items={
|
22 |
+
'Get Help': 'https://huggingface.co/awacke1',
|
23 |
+
'Report a bug': 'https://huggingface.co/spaces/awacke1',
|
24 |
+
'About': "🚲TalkingAIResearcher🏆"
|
25 |
+
}
|
26 |
+
)
|
27 |
+
|
28 |
+
# Custom CSS
|
29 |
+
st.markdown("""
|
30 |
+
<style>
|
31 |
+
.main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
|
32 |
+
.stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
|
33 |
+
.stButton>button { margin-right: 0.5rem; }
|
34 |
+
</style>
|
35 |
+
""", unsafe_allow_html=True)
|
36 |
+
|
37 |
+
# Constants
|
38 |
+
EDGE_TTS_VOICES = [
|
39 |
+
"en-US-AriaNeural", # Default voice
|
40 |
+
"en-US-GuyNeural",
|
41 |
+
"en-US-JennyNeural",
|
42 |
+
"en-GB-SoniaNeural",
|
43 |
+
"en-GB-RyanNeural",
|
44 |
+
"en-AU-NatashaNeural",
|
45 |
+
"en-AU-WilliamNeural",
|
46 |
+
"en-CA-ClaraNeural",
|
47 |
+
"en-CA-LiamNeural"
|
48 |
+
]
|
49 |
+
|
50 |
+
FILE_EMOJIS = {
|
51 |
+
"md": "📝",
|
52 |
+
"mp3": "🎵",
|
53 |
+
"wav": "🔊",
|
54 |
+
"txt": "📄",
|
55 |
+
"pdf": "📑",
|
56 |
+
"html": "🌐"
|
57 |
+
}
|
58 |
+
|
59 |
+
# Load environment variables
|
60 |
load_dotenv()
|
61 |
|
62 |
+
# API Setup
|
63 |
+
openai_api_key = os.getenv('OPENAI_API_KEY', "")
|
64 |
+
anthropic_key = os.getenv('ANTHROPIC_API_KEY', "")
|
65 |
+
if 'OPENAI_API_KEY' in st.secrets:
|
66 |
+
openai_api_key = st.secrets['OPENAI_API_KEY']
|
67 |
+
if 'ANTHROPIC_API_KEY' in st.secrets:
|
68 |
+
anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
|
69 |
+
|
70 |
+
openai_client = OpenAI(api_key=openai_api_key)
|
71 |
+
claude_client = anthropic.Anthropic(api_key=anthropic_key)
|
72 |
|
73 |
+
# Initialize Session State
|
74 |
state_vars = {
|
75 |
'tts_voice': EDGE_TTS_VOICES[0],
|
76 |
'audio_format': 'mp3',
|
|
|
90 |
if key not in st.session_state:
|
91 |
st.session_state[key] = default
|
92 |
|
93 |
+
# Core Functions
|
|
|
|
|
|
|
94 |
@st.cache_resource
|
95 |
def get_cached_audio_b64(file_path):
|
96 |
with open(file_path, "rb") as f:
|
|
|
100 |
name = os.path.splitext(filename)[0]
|
101 |
return name.replace('_', ' ').replace('.', ' ')
|
102 |
|
103 |
+
def display_marquee_controls():
|
104 |
+
st.sidebar.markdown("### 🎯 Marquee Settings")
|
105 |
+
cols = st.sidebar.columns(2)
|
106 |
+
with cols[0]:
|
107 |
+
bg_color = st.color_picker("🎨 Background", "#1E1E1E")
|
108 |
+
text_color = st.color_picker("✍️ Text", "#FFFFFF")
|
109 |
+
with cols[1]:
|
110 |
+
font_size = st.slider("📏 Size", 10, 24, 14)
|
111 |
+
duration = st.slider("⏱️ Speed", 1, 20, 10)
|
112 |
+
|
113 |
+
return {
|
114 |
+
"background": bg_color,
|
115 |
+
"color": text_color,
|
116 |
+
"font-size": f"{font_size}px",
|
117 |
+
"animationDuration": f"{duration}s",
|
118 |
+
"width": "100%",
|
119 |
+
"lineHeight": "35px"
|
120 |
+
}
|
121 |
+
|
122 |
+
def get_high_info_terms(text: str, top_n=10) -> list:
|
123 |
+
stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for'])
|
124 |
+
words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
|
125 |
+
bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
|
126 |
+
combined = words + bi_grams
|
127 |
+
filtered = [term for term in combined if term not in stop_words and len(term) > 3]
|
128 |
+
return Counter(filtered).most_common(top_n)
|
129 |
+
|
130 |
+
def generate_filename(prompt, response, file_type="md"):
|
131 |
+
prefix = datetime.now().strftime("%y%m_%H%M") + "_"
|
132 |
+
combined = (prompt + " " + response).strip()
|
133 |
+
name_parts = [term for term, _ in get_high_info_terms(combined, top_n=5)]
|
134 |
+
filename = prefix + "_".join(name_parts)[:150] + "." + file_type
|
135 |
+
return filename
|
136 |
+
|
137 |
+
def create_file(prompt, response, file_type="md"):
|
138 |
+
filename = generate_filename(prompt.strip(), response.strip(), file_type)
|
139 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
140 |
+
f.write(f"{prompt}\n\n{response}")
|
141 |
+
return filename
|
142 |
+
|
143 |
+
def get_download_link(file_path, file_type="zip"):
|
144 |
+
with open(file_path, "rb") as f:
|
145 |
+
b64 = base64.b64encode(f.read()).decode()
|
146 |
+
ext_map = {'zip': '📦', 'mp3': '🎵', 'wav': '🔊', 'md': '📝'}
|
147 |
+
emoji = ext_map.get(file_type, '')
|
148 |
+
return f'<a href="data:application/{file_type};base64,{b64}" download="{os.path.basename(file_path)}">{emoji} Download {os.path.basename(file_path)}</a>'
|
149 |
+
|
150 |
+
# Audio Processing
|
151 |
def clean_speech_text(text):
|
152 |
text = re.sub(r'\s+', ' ', text.strip())
|
153 |
text = text.replace("</s>", "").replace("#", "")
|
|
|
175 |
st.session_state.messages.append({"role": "user", "content": text})
|
176 |
return text
|
177 |
|
178 |
+
# File Management
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
def load_files_for_sidebar():
|
|
|
180 |
files = []
|
181 |
for f in glob.glob("*.*"):
|
182 |
basename = os.path.basename(f)
|
|
|
254 |
height=0
|
255 |
)
|
256 |
|
257 |
+
# ArXiv Integration
|
258 |
def perform_arxiv_search(query):
|
259 |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
|
260 |
papers = client.predict(
|
|
|
326 |
st.write("### 📢 Summary")
|
327 |
st.audio(summary_audio)
|
328 |
|
329 |
+
def display_papers(papers):
|
330 |
+
st.write("## Research Papers")
|
331 |
+
for i, paper in enumerate(papers[:20], 1):
|
332 |
+
with st.expander(f"{i}. 📄 {paper['title']}", expanded=True):
|
333 |
+
st.markdown(f"**{paper['date']} | {paper['title']} | ⬇️**")
|
334 |
+
st.markdown(f"*{paper['authors']}*")
|
335 |
+
st.markdown(paper[previous code] ... st.markdown(paper['summary'])
|
336 |
+
if paper.get('audio'):
|
337 |
+
st.write("📚 Paper Audio")
|
338 |
+
st.audio(paper['audio'])
|
339 |
+
|
340 |
+
def process_with_gpt(text):
|
341 |
+
if not text: return
|
342 |
+
st.session_state.messages.append({"role": "user", "content": text})
|
343 |
+
with st.chat_message("user"):
|
344 |
+
st.markdown(text)
|
345 |
+
with st.chat_message("assistant"):
|
346 |
+
response = openai_client.chat.completions.create(
|
347 |
+
model="gpt-4-turbo-preview",
|
348 |
+
messages=st.session_state.messages,
|
349 |
+
stream=False
|
350 |
+
)
|
351 |
+
answer = response.choices[0].message.content
|
352 |
+
st.write(f"GPT-4: {answer}")
|
353 |
+
create_file(text, answer, "md")
|
354 |
+
st.session_state.messages.append({"role": "assistant", "content": answer})
|
355 |
+
return answer
|
356 |
+
|
357 |
+
def process_with_claude(text):
|
358 |
+
if not text: return
|
359 |
+
with st.chat_message("user"):
|
360 |
+
st.markdown(text)
|
361 |
+
with st.chat_message("assistant"):
|
362 |
+
response = claude_client.messages.create(
|
363 |
+
model="claude-3-sonnet-20240229",
|
364 |
+
max_tokens=4000,
|
365 |
+
messages=[{"role": "user", "content": text}]
|
366 |
+
)
|
367 |
+
answer = response.content[0].text
|
368 |
+
st.write(f"Claude-3: {answer}")
|
369 |
+
create_file(text, answer, "md")
|
370 |
+
st.session_state.chat_history.append({"user": text, "claude": answer})
|
371 |
+
return answer
|
372 |
+
|
373 |
+
# Main App
|
374 |
def main():
|
375 |
+
st.sidebar.title("🚲 Research Assistant")
|
376 |
|
377 |
# Voice settings
|
378 |
+
st.sidebar.markdown("### 🎤 Voice Config")
|
379 |
+
voice = st.sidebar.selectbox("Voice:", EDGE_TTS_VOICES,
|
380 |
index=EDGE_TTS_VOICES.index(st.session_state['tts_voice']))
|
381 |
fmt = st.sidebar.radio("Format:", ["MP3", "WAV"], index=0)
|
382 |
|
|
|
398 |
text = st.text_area("Message:", height=100).strip()
|
399 |
if st.button("Send"):
|
400 |
process_with_gpt(text)
|
401 |
+
|
402 |
st.subheader("History")
|
403 |
tab1, tab2 = st.tabs(["Claude", "GPT-4"])
|
404 |
with tab1:
|
|
|
420 |
st.success("Saved!")
|
421 |
st.session_state.should_rerun = True
|
422 |
|
423 |
+
# File management
|
424 |
groups = load_files_for_sidebar()
|
425 |
display_file_manager_sidebar(groups)
|
426 |
|
|
|
428 |
st.session_state.should_rerun = False
|
429 |
st.rerun()
|
430 |
|
431 |
+
sidebar_md = """# 📚 Research
|
432 |
## AGI Levels
|
433 |
L0 ❌ No AI
|
434 |
L1 🌱 ChatGPT [2303.08774](https://arxiv.org/abs/2303.08774) | [PDF](https://arxiv.org/pdf/2303.08774.pdf)
|
435 |
L2 💪 Watson [2201.11903](https://arxiv.org/abs/2201.11903) | [PDF](https://arxiv.org/pdf/2201.11903.pdf)
|
436 |
L3 🎯 DALL·E [2204.06125](https://arxiv.org/abs/2204.06125) | [PDF](https://arxiv.org/pdf/2204.06125.pdf)
|
437 |
L4 🏆 AlphaGo [1712.01815](https://arxiv.org/abs/1712.01815) | [PDF](https://arxiv.org/pdf/1712.01815.pdf)
|
438 |
+
L5 🚀 AlphaFold [2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
|
439 |
|
440 |
+
## 🧬 AlphaFold2
|
441 |
[2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
|
442 |
+
1. 🧬 Input → 2. 🔍 Search → 3. 🧩 MSA
|
443 |
+
4. 📑 Templates → 5. 🔄 Evoformer → 6. 🧱 Structure
|
444 |
7. 🎯 3D Predict → 8. ♻️ Recycle"""
|
445 |
|
446 |
st.sidebar.markdown(sidebar_md)
|