import streamlit as st
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
from datetime import datetime
from audio_recorder_streamlit import audio_recorder
from collections import defaultdict
from dotenv import load_dotenv
from gradio_client import Client
from huggingface_hub import InferenceClient
from PIL import Image
from streamlit_marquee import streamlit_marquee
import asyncio
import edge_tts
st.set_page_config(page_title="π²TalkingAIResearcherπ", page_icon="π²π", layout="wide")
EDGE_TTS_VOICES = ["en-US-AriaNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-GB-SoniaNeural"]
FILE_EMOJIS = {"md": "π", "mp3": "π΅", "wav": "π", "txt": "π", "pdf": "π"}
# Session state initialization
for key, default in {
'tts_voice': EDGE_TTS_VOICES[0],
'audio_format': 'mp3',
'messages': [],
'chat_history': [],
'transcript_history': [],
'viewing_prefix': None,
'should_rerun': False
}.items():
if key not in st.session_state:
st.session_state[key] = default
@st.cache_resource
def get_cached_audio_b64(file_path):
with open(file_path, "rb") as f:
return base64.b64encode(f.read()).decode()
def beautify_filename(filename):
name = os.path.splitext(filename)[0]
return name.replace('_', ' ').replace('.', ' ')
def load_files_for_sidebar():
"""Load and filter files for sidebar by timestamp prefix"""
files = []
for f in glob.glob("*.*"):
basename = os.path.basename(f)
if f.endswith('.md'):
if len(basename) >= 9 and re.match(r'\d{4}_\d{4}', basename[:9]):
files.append(f)
else:
files.append(f)
groups = defaultdict(list)
for f in files:
basename = os.path.basename(f)
group_name = basename[:9] if len(basename) >= 9 else 'Other'
groups[group_name].append(f)
return sorted(groups.items(),
key=lambda x: max(os.path.getmtime(f) for f in x[1]),
reverse=True)
def display_marquee_controls():
st.sidebar.markdown("### π― Marquee Settings")
cols = st.sidebar.columns(2)
with cols[0]:
bg_color = st.color_picker("π¨ Background", "#1E1E1E")
text_color = st.color_picker("βοΈ Text", "#FFFFFF")
with cols[1]:
font_size = st.slider("π Size", 10, 24, 14)
duration = st.slider("β±οΈ Speed", 1, 20, 10)
return {
"background": bg_color,
"color": text_color,
"font-size": f"{font_size}px",
"animationDuration": f"{duration}s",
"width": "100%",
"lineHeight": "35px"
}
def get_download_link(file_path, file_type="zip"):
with open(file_path, "rb") as f:
b64 = base64.b64encode(f.read()).decode()
ext_map = {'zip': 'π¦', 'mp3': 'π΅', 'wav': 'π', 'md': 'π'}
emoji = ext_map.get(file_type, '')
return f'{emoji} Download {os.path.basename(file_path)}'
def create_zip_of_files(md_files, mp3_files, wav_files, query=''):
all_files = md_files + mp3_files + wav_files
if not all_files: return None
timestamp = datetime.now().strftime("%y%m_%H%M")
zip_name = f"{timestamp}_archive.zip"
with zipfile.ZipFile(zip_name, 'w') as z:
for f in all_files:
z.write(f)
return zip_name
def display_file_manager_sidebar(groups_sorted):
st.sidebar.title("π File Manager")
all_files = {'md': [], 'mp3': [], 'wav': []}
for _, files in groups_sorted:
for f in files:
ext = os.path.splitext(f)[1].lower().strip('.')
if ext in all_files:
all_files[ext].append(f)
cols = st.sidebar.columns(4)
for i, (ext, files) in enumerate(all_files.items()):
with cols[i]:
if st.button(f"ποΈ {ext.upper()}"):
[os.remove(f) for f in files]
st.session_state.should_rerun = True
if st.sidebar.button("π¦ Zip All"):
zip_name = create_zip_of_files(all_files['md'], all_files['mp3'], all_files['wav'])
if zip_name:
st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)
for group_name, files in groups_sorted:
try:
timestamp = datetime.strptime(group_name, "%y%m_%H%M").strftime("%Y-%m-%d %H:%M") if len(group_name) == 9 and group_name != 'Other' else group_name
except ValueError:
timestamp = group_name
with st.sidebar.expander(f"π {timestamp} ({len(files)})", expanded=True):
c1, c2 = st.columns(2)
with c1:
if st.button("π", key=f"view_{group_name}"):
st.session_state.viewing_prefix = group_name
with c2:
if st.button("ποΈ", key=f"del_{group_name}"):
[os.remove(f) for f in files]
st.session_state.should_rerun = True
for f in files:
ext = os.path.splitext(f)[1].lower().strip('.')
emoji = FILE_EMOJIS.get(ext, 'π')
pretty_name = beautify_filename(os.path.basename(f))
st.write(f"{emoji} **{pretty_name}**")
if ext in ['mp3', 'wav']:
st.audio(f)
if st.button("π", key=f"loop_{f}"):
audio_b64 = get_cached_audio_b64(f)
st.components.v1.html(
f'''
''',
height=0
)
async def edge_tts_generate(text, voice, file_format="mp3"):
text = re.sub(r'\s+', ' ', text).strip()
if not text: return None
communicate = edge_tts.Communicate(text, voice)
filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}"
await communicate.save(filename)
return filename
def parse_arxiv_refs(text):
papers = []
current_paper = None
for line in text.split('\n'):
if '|' in line:
if current_paper:
papers.append(current_paper)
parts = line.strip('* ').split('|')
current_paper = {
'date': parts[0].strip(),
'title': parts[1].strip(),
'authors': '',
'summary': '',
'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else ''
}
elif current_paper:
if not current_paper['authors']:
current_paper['authors'] = line.strip('* ')
else:
current_paper['summary'] += ' ' + line.strip()
if current_paper:
papers.append(current_paper)
return papers
def perform_ai_lookup(query):
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
response = client.predict(
query, 20, "Semantic Search",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
api_name="/update_with_rag_md"
)
papers = parse_arxiv_refs(response[0])
marquee_settings = display_marquee_controls()
for paper in papers:
content = f"π {paper['title']} | π€ {paper['authors']} | π {paper['summary']}"
streamlit_marquee(
content=content,
**marquee_settings,
key=f"paper_{paper['id'] or random.randint(1000,9999)}"
)
st.write("")
return papers
def main():
marquee_settings = display_marquee_controls()
streamlit_marquee(
content="π Welcome to TalkingAIResearcher | π€ Your Research Assistant",
**marquee_settings,
key="welcome"
)
tab = st.radio("Action:", ["π€ Voice", "π ArXiv", "π Editor"], horizontal=True)
if tab == "π ArXiv":
query = st.text_input("π Search:")
if query:
papers = perform_ai_lookup(query)
st.write(f"Found {len(papers)} papers")
groups = load_files_for_sidebar()
display_file_manager_sidebar(groups)
if st.session_state.should_rerun:
st.session_state.should_rerun = False
st.rerun()
# Condensed sidebar markdown
sidebar_md = """# π Research Papers
## π§ AGI Levels
L0 β No AI
L1 π± ChatGPT/Bard [2303.08774v1](https://arxiv.org/abs/2303.08774) [PDF](https://arxiv.org/pdf/2303.08774.pdf)
L2 πͺ Watson [2201.11903v1](https://arxiv.org/abs/2201.11903) [PDF](https://arxiv.org/pdf/2201.11903.pdf)
L3 π― DALLΒ·E [2204.06125v1](https://arxiv.org/abs/2204.06125) [PDF](https://arxiv.org/pdf/2204.06125.pdf)
L4 π AlphaGo [1712.01815v1](https://arxiv.org/abs/1712.01815) [PDF](https://arxiv.org/pdf/1712.01815.pdf)
L5 π AlphaFold [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
## 𧬠AlphaFold2
[2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
1. 𧬠Input Seq β 2. π DB Search β 3. 𧩠MSA
4. π Templates β 5. π Evoformer β 6. 𧱠Structure
7. π― 3D Predict β 8. β»οΈ Recycle x3"""
st.sidebar.markdown(sidebar_md)
if __name__ == "__main__":
main()