Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -1,34 +1,48 @@
|
|
1 |
import streamlit as st
|
2 |
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
|
3 |
-
from datetime import datetime
|
4 |
from audio_recorder_streamlit import audio_recorder
|
5 |
from collections import defaultdict
|
6 |
from dotenv import load_dotenv
|
7 |
from gradio_client import Client
|
8 |
from huggingface_hub import InferenceClient
|
9 |
from PIL import Image
|
|
|
10 |
from streamlit_marquee import streamlit_marquee
|
11 |
import asyncio
|
12 |
import edge_tts
|
13 |
|
|
|
14 |
st.set_page_config(page_title="🚲TalkingAIResearcher🏆", page_icon="🚲🏆", layout="wide")
|
|
|
15 |
|
16 |
EDGE_TTS_VOICES = ["en-US-AriaNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-GB-SoniaNeural"]
|
17 |
-
FILE_EMOJIS = {"md": "📝", "mp3": "🎵", "wav": "🔊", "txt": "📄", "pdf": "📑"}
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
'tts_voice': EDGE_TTS_VOICES[0],
|
22 |
'audio_format': 'mp3',
|
23 |
'messages': [],
|
24 |
'chat_history': [],
|
25 |
'transcript_history': [],
|
26 |
'viewing_prefix': None,
|
27 |
-
'should_rerun': False
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
if key not in st.session_state:
|
30 |
st.session_state[key] = default
|
31 |
|
|
|
|
|
|
|
|
|
32 |
@st.cache_resource
|
33 |
def get_cached_audio_b64(file_path):
|
34 |
with open(file_path, "rb") as f:
|
@@ -38,8 +52,66 @@ def beautify_filename(filename):
|
|
38 |
name = os.path.splitext(filename)[0]
|
39 |
return name.replace('_', ' ').replace('.', ' ')
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
def load_files_for_sidebar():
|
42 |
-
"""Load and filter files
|
43 |
files = []
|
44 |
for f in glob.glob("*.*"):
|
45 |
basename = os.path.basename(f)
|
@@ -59,42 +131,6 @@ def load_files_for_sidebar():
|
|
59 |
key=lambda x: max(os.path.getmtime(f) for f in x[1]),
|
60 |
reverse=True)
|
61 |
|
62 |
-
def display_marquee_controls():
|
63 |
-
st.sidebar.markdown("### 🎯 Marquee Settings")
|
64 |
-
cols = st.sidebar.columns(2)
|
65 |
-
with cols[0]:
|
66 |
-
bg_color = st.color_picker("🎨 Background", "#1E1E1E")
|
67 |
-
text_color = st.color_picker("✍️ Text", "#FFFFFF")
|
68 |
-
with cols[1]:
|
69 |
-
font_size = st.slider("📏 Size", 10, 24, 14)
|
70 |
-
duration = st.slider("⏱️ Speed", 1, 20, 10)
|
71 |
-
|
72 |
-
return {
|
73 |
-
"background": bg_color,
|
74 |
-
"color": text_color,
|
75 |
-
"font-size": f"{font_size}px",
|
76 |
-
"animationDuration": f"{duration}s",
|
77 |
-
"width": "100%",
|
78 |
-
"lineHeight": "35px"
|
79 |
-
}
|
80 |
-
|
81 |
-
def get_download_link(file_path, file_type="zip"):
|
82 |
-
with open(file_path, "rb") as f:
|
83 |
-
b64 = base64.b64encode(f.read()).decode()
|
84 |
-
ext_map = {'zip': '📦', 'mp3': '🎵', 'wav': '🔊', 'md': '📝'}
|
85 |
-
emoji = ext_map.get(file_type, '')
|
86 |
-
return f'<a href="data:application/{file_type};base64,{b64}" download="{os.path.basename(file_path)}">{emoji} Download {os.path.basename(file_path)}</a>'
|
87 |
-
|
88 |
-
def create_zip_of_files(md_files, mp3_files, wav_files, query=''):
|
89 |
-
all_files = md_files + mp3_files + wav_files
|
90 |
-
if not all_files: return None
|
91 |
-
timestamp = datetime.now().strftime("%y%m_%H%M")
|
92 |
-
zip_name = f"{timestamp}_archive.zip"
|
93 |
-
with zipfile.ZipFile(zip_name, 'w') as z:
|
94 |
-
for f in all_files:
|
95 |
-
z.write(f)
|
96 |
-
return zip_name
|
97 |
-
|
98 |
def display_file_manager_sidebar(groups_sorted):
|
99 |
st.sidebar.title("📚 File Manager")
|
100 |
all_files = {'md': [], 'mp3': [], 'wav': []}
|
@@ -153,79 +189,126 @@ def display_file_manager_sidebar(groups_sorted):
|
|
153 |
height=0
|
154 |
)
|
155 |
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
def parse_arxiv_refs(text):
|
165 |
papers = []
|
166 |
-
|
167 |
|
168 |
for line in text.split('\n'):
|
169 |
if '|' in line:
|
170 |
-
if
|
171 |
-
papers.append(current_paper)
|
172 |
parts = line.strip('* ').split('|')
|
173 |
-
|
174 |
'date': parts[0].strip(),
|
175 |
'title': parts[1].strip(),
|
176 |
'authors': '',
|
177 |
'summary': '',
|
178 |
'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else ''
|
179 |
}
|
180 |
-
elif
|
181 |
-
if not
|
182 |
-
|
183 |
else:
|
184 |
-
|
185 |
|
186 |
-
if
|
187 |
-
|
188 |
-
return papers
|
189 |
|
190 |
-
def
|
191 |
-
|
192 |
-
response = client.predict(
|
193 |
-
query, 20, "Semantic Search",
|
194 |
-
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
195 |
-
api_name="/update_with_rag_md"
|
196 |
-
)
|
197 |
-
|
198 |
-
papers = parse_arxiv_refs(response[0])
|
199 |
-
marquee_settings = display_marquee_controls()
|
200 |
-
|
201 |
for paper in papers:
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
def main():
|
213 |
-
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
)
|
220 |
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
-
if
|
224 |
query = st.text_input("🔍 Search:")
|
225 |
if query:
|
226 |
-
|
227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
groups = load_files_for_sidebar()
|
230 |
display_file_manager_sidebar(groups)
|
231 |
|
@@ -233,22 +316,20 @@ def main():
|
|
233 |
st.session_state.should_rerun = False
|
234 |
st.rerun()
|
235 |
|
236 |
-
# Condensed sidebar markdown
|
237 |
sidebar_md = """# 📚 Research Papers
|
238 |
-
|
239 |
-
## 🧠 AGI Levels
|
240 |
L0 ❌ No AI
|
241 |
-
L1 🌱 ChatGPT
|
242 |
-
L2 💪 Watson [2201.
|
243 |
-
L3 🎯 DALL·E [2204.
|
244 |
-
L4 🏆 AlphaGo [1712.
|
245 |
-
L5 🚀 AlphaFold [2203.
|
246 |
|
247 |
## 🧬 AlphaFold2
|
248 |
-
[2203.
|
249 |
-
1. 🧬 Input
|
250 |
-
4. 📑 Templates → 5. 🔄 Evoformer → 6. 🧱 Structure
|
251 |
-
7. 🎯 3D Predict → 8. ♻️ Recycle
|
252 |
|
253 |
st.sidebar.markdown(sidebar_md)
|
254 |
|
|
|
1 |
import streamlit as st
|
2 |
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
|
3 |
+
from datetime import datetime
|
4 |
from audio_recorder_streamlit import audio_recorder
|
5 |
from collections import defaultdict
|
6 |
from dotenv import load_dotenv
|
7 |
from gradio_client import Client
|
8 |
from huggingface_hub import InferenceClient
|
9 |
from PIL import Image
|
10 |
+
from openai import OpenAI
|
11 |
from streamlit_marquee import streamlit_marquee
|
12 |
import asyncio
|
13 |
import edge_tts
|
14 |
|
15 |
+
# App Config
|
16 |
st.set_page_config(page_title="🚲TalkingAIResearcher🏆", page_icon="🚲🏆", layout="wide")
|
17 |
+
load_dotenv()
|
18 |
|
19 |
EDGE_TTS_VOICES = ["en-US-AriaNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-GB-SoniaNeural"]
|
20 |
+
FILE_EMOJIS = {"md": "📝", "mp3": "🎵", "wav": "🔊", "txt": "📄", "pdf": "📑", "html": "🌐"}
|
21 |
|
22 |
+
# Initialize session state
|
23 |
+
state_vars = {
|
24 |
'tts_voice': EDGE_TTS_VOICES[0],
|
25 |
'audio_format': 'mp3',
|
26 |
'messages': [],
|
27 |
'chat_history': [],
|
28 |
'transcript_history': [],
|
29 |
'viewing_prefix': None,
|
30 |
+
'should_rerun': False,
|
31 |
+
'editing_mode': False,
|
32 |
+
'current_file': None,
|
33 |
+
'file_content': None,
|
34 |
+
'old_val': None,
|
35 |
+
'last_query': ''
|
36 |
+
}
|
37 |
+
|
38 |
+
for key, default in state_vars.items():
|
39 |
if key not in st.session_state:
|
40 |
st.session_state[key] = default
|
41 |
|
42 |
+
# API clients setup
|
43 |
+
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
44 |
+
claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
|
45 |
+
|
46 |
@st.cache_resource
|
47 |
def get_cached_audio_b64(file_path):
|
48 |
with open(file_path, "rb") as f:
|
|
|
52 |
name = os.path.splitext(filename)[0]
|
53 |
return name.replace('_', ' ').replace('.', ' ')
|
54 |
|
55 |
+
def clean_speech_text(text):
|
56 |
+
text = re.sub(r'\s+', ' ', text.strip())
|
57 |
+
text = text.replace("</s>", "").replace("#", "")
|
58 |
+
text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
|
59 |
+
return text
|
60 |
+
|
61 |
+
async def edge_tts_generate(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
|
62 |
+
text = clean_speech_text(text)
|
63 |
+
if not text: return None
|
64 |
+
communicate = edge_tts.Communicate(text, voice, rate=f"{rate}%", pitch=f"{pitch}Hz")
|
65 |
+
filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}"
|
66 |
+
await communicate.save(filename)
|
67 |
+
return filename
|
68 |
+
|
69 |
+
def speak_text(text, voice=None, file_format=None):
|
70 |
+
if not text: return None
|
71 |
+
voice = voice or st.session_state['tts_voice']
|
72 |
+
fmt = file_format or st.session_state['audio_format']
|
73 |
+
return asyncio.run(edge_tts_generate(text, voice, file_format=fmt))
|
74 |
+
|
75 |
+
def process_audio_file(audio_path):
|
76 |
+
with open(audio_path, "rb") as f:
|
77 |
+
transcript = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
|
78 |
+
text = transcript.text
|
79 |
+
st.session_state.messages.append({"role": "user", "content": text})
|
80 |
+
return text
|
81 |
+
|
82 |
+
def process_with_gpt(text):
|
83 |
+
if not text: return
|
84 |
+
st.session_state.messages.append({"role": "user", "content": text})
|
85 |
+
with st.chat_message("user"): st.markdown(text)
|
86 |
+
with st.chat_message("assistant"):
|
87 |
+
response = openai_client.chat.completions.create(
|
88 |
+
model="gpt-4-turbo-preview",
|
89 |
+
messages=st.session_state.messages,
|
90 |
+
stream=False
|
91 |
+
)
|
92 |
+
answer = response.choices[0].message.content
|
93 |
+
st.write(f"GPT-4: {answer}")
|
94 |
+
create_file(text, answer, "md")
|
95 |
+
st.session_state.messages.append({"role": "assistant", "content": answer})
|
96 |
+
return answer
|
97 |
+
|
98 |
+
def process_with_claude(text):
|
99 |
+
if not text: return
|
100 |
+
with st.chat_message("user"): st.markdown(text)
|
101 |
+
with st.chat_message("assistant"):
|
102 |
+
response = claude_client.messages.create(
|
103 |
+
model="claude-3-sonnet-20240229",
|
104 |
+
max_tokens=4000,
|
105 |
+
messages=[{"role": "user", "content": text}]
|
106 |
+
)
|
107 |
+
answer = response.content[0].text
|
108 |
+
st.write(f"Claude-3: {answer}")
|
109 |
+
create_file(text, answer, "md")
|
110 |
+
st.session_state.chat_history.append({"user": text, "claude": answer})
|
111 |
+
return answer
|
112 |
+
|
113 |
def load_files_for_sidebar():
|
114 |
+
"""Load and filter files by timestamp prefix"""
|
115 |
files = []
|
116 |
for f in glob.glob("*.*"):
|
117 |
basename = os.path.basename(f)
|
|
|
131 |
key=lambda x: max(os.path.getmtime(f) for f in x[1]),
|
132 |
reverse=True)
|
133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
def display_file_manager_sidebar(groups_sorted):
|
135 |
st.sidebar.title("📚 File Manager")
|
136 |
all_files = {'md': [], 'mp3': [], 'wav': []}
|
|
|
189 |
height=0
|
190 |
)
|
191 |
|
192 |
+
def perform_arxiv_search(query):
|
193 |
+
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
|
194 |
+
papers = client.predict(
|
195 |
+
query, 20, "Semantic Search",
|
196 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
197 |
+
api_name="/update_with_rag_md"
|
198 |
+
)[0]
|
199 |
+
|
200 |
+
summary = client.predict(
|
201 |
+
query,
|
202 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
203 |
+
True,
|
204 |
+
api_name="/ask_llm"
|
205 |
+
)
|
206 |
+
|
207 |
+
result = f"### 🔎 {query}\n\n{summary}\n\n{papers}"
|
208 |
+
st.markdown(result)
|
209 |
+
|
210 |
+
papers_data = parse_arxiv_refs(papers)
|
211 |
+
if papers_data:
|
212 |
+
create_paper_audio(papers_data, query)
|
213 |
+
display_papers(papers_data)
|
214 |
+
|
215 |
+
create_file(query, result, "md")
|
216 |
+
return result
|
217 |
|
218 |
def parse_arxiv_refs(text):
|
219 |
papers = []
|
220 |
+
current = None
|
221 |
|
222 |
for line in text.split('\n'):
|
223 |
if '|' in line:
|
224 |
+
if current: papers.append(current)
|
|
|
225 |
parts = line.strip('* ').split('|')
|
226 |
+
current = {
|
227 |
'date': parts[0].strip(),
|
228 |
'title': parts[1].strip(),
|
229 |
'authors': '',
|
230 |
'summary': '',
|
231 |
'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else ''
|
232 |
}
|
233 |
+
elif current:
|
234 |
+
if not current['authors']:
|
235 |
+
current['authors'] = line.strip('* ')
|
236 |
else:
|
237 |
+
current['summary'] += ' ' + line.strip()
|
238 |
|
239 |
+
if current: papers.append(current)
|
240 |
+
return papers[:20]
|
|
|
241 |
|
242 |
+
def create_paper_audio(papers, query):
|
243 |
+
combined = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
for paper in papers:
|
245 |
+
try:
|
246 |
+
text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
|
247 |
+
file_format = st.session_state['audio_format']
|
248 |
+
audio_file = speak_text(text, file_format=file_format)
|
249 |
+
paper['audio'] = audio_file
|
250 |
+
st.write(f"### {FILE_EMOJIS.get(file_format, '')} {os.path.basename(audio_file)}")
|
251 |
+
st.audio(audio_file)
|
252 |
+
combined.append(paper['title'])
|
253 |
+
except Exception as e:
|
254 |
+
st.warning(f"Error generating audio for {paper['title']}: {str(e)}")
|
255 |
+
|
256 |
+
if combined:
|
257 |
+
summary = f"Found papers about: {'; '.join(combined)}. Query was: {query}"
|
258 |
+
summary_audio = speak_text(summary)
|
259 |
+
if summary_audio:
|
260 |
+
st.write("### 📢 Summary")
|
261 |
+
st.audio(summary_audio)
|
262 |
|
263 |
def main():
|
264 |
+
st.sidebar.markdown("### 🚲BikeAI🏆 Research Assistant")
|
265 |
|
266 |
+
# Voice settings
|
267 |
+
st.sidebar.markdown("### 🎤 Voice Config")
|
268 |
+
voice = st.sidebar.selectbox("Voice:", EDGE_TTS_VOICES,
|
269 |
+
index=EDGE_TTS_VOICES.index(st.session_state['tts_voice']))
|
270 |
+
fmt = st.sidebar.radio("Format:", ["MP3", "WAV"], index=0)
|
271 |
|
272 |
+
if voice != st.session_state['tts_voice']:
|
273 |
+
st.session_state['tts_voice'] = voice
|
274 |
+
st.rerun()
|
275 |
+
if fmt.lower() != st.session_state['audio_format']:
|
276 |
+
st.session_state['audio_format'] = fmt.lower()
|
277 |
+
st.rerun()
|
278 |
+
|
279 |
+
mode = st.radio("Mode:", ["🎤 Voice", "🔍 ArXiv", "📝 Editor"], horizontal=True)
|
280 |
|
281 |
+
if mode == "🔍 ArXiv":
|
282 |
query = st.text_input("🔍 Search:")
|
283 |
if query:
|
284 |
+
perform_arxiv_search(query)
|
285 |
+
|
286 |
+
elif mode == "🎤 Voice":
|
287 |
+
text = st.text_area("Message:", height=100).strip()
|
288 |
+
if st.button("Send"):
|
289 |
+
process_with_gpt(text)
|
290 |
+
|
291 |
+
st.subheader("History")
|
292 |
+
tab1, tab2 = st.tabs(["Claude", "GPT-4"])
|
293 |
+
with tab1:
|
294 |
+
for msg in st.session_state.chat_history:
|
295 |
+
st.write("You:", msg["user"])
|
296 |
+
st.write("Claude:", msg["claude"])
|
297 |
+
with tab2:
|
298 |
+
for msg in st.session_state.messages:
|
299 |
+
with st.chat_message(msg["role"]):
|
300 |
+
st.markdown(msg["content"])
|
301 |
|
302 |
+
elif mode == "📝 Editor":
|
303 |
+
if st.session_state.current_file:
|
304 |
+
st.subheader(f"Editing: {st.session_state.current_file}")
|
305 |
+
new_content = st.text_area("Content:", st.session_state.file_content, height=300)
|
306 |
+
if st.button("Save"):
|
307 |
+
with open(st.session_state.current_file, 'w') as f:
|
308 |
+
f.write(new_content)
|
309 |
+
st.success("Saved!")
|
310 |
+
st.session_state.should_rerun = True
|
311 |
+
|
312 |
groups = load_files_for_sidebar()
|
313 |
display_file_manager_sidebar(groups)
|
314 |
|
|
|
316 |
st.session_state.should_rerun = False
|
317 |
st.rerun()
|
318 |
|
|
|
319 |
sidebar_md = """# 📚 Research Papers
|
320 |
+
## AGI Levels
|
|
|
321 |
L0 ❌ No AI
|
322 |
+
L1 🌱 ChatGPT [2303.08774](https://arxiv.org/abs/2303.08774) | [PDF](https://arxiv.org/pdf/2303.08774.pdf)
|
323 |
+
L2 💪 Watson [2201.11903](https://arxiv.org/abs/2201.11903) | [PDF](https://arxiv.org/pdf/2201.11903.pdf)
|
324 |
+
L3 🎯 DALL·E [2204.06125](https://arxiv.org/abs/2204.06125) | [PDF](https://arxiv.org/pdf/2204.06125.pdf)
|
325 |
+
L4 🏆 AlphaGo [1712.01815](https://arxiv.org/abs/1712.01815) | [PDF](https://arxiv.org/pdf/1712.01815.pdf)
|
326 |
+
L5 🚀 AlphaFold [L5 🚀 AlphaFold [2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
|
327 |
|
328 |
## 🧬 AlphaFold2
|
329 |
+
[2203.15556](https://arxiv.org/abs/2203.15556) | [PDF](https://arxiv.org/pdf/2203.15556.pdf)
|
330 |
+
1. 🧬 Input → 2. 🔍 Search → 3. 🧩 MSA
|
331 |
+
4. 📑 Templates → 5. 🔄 Evoformer → 6. 🧱 Structure
|
332 |
+
7. 🎯 3D Predict → 8. ♻️ Recycle"""
|
333 |
|
334 |
st.sidebar.markdown(sidebar_md)
|
335 |
|