import streamlit as st
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
from datetime import datetime
from audio_recorder_streamlit import audio_recorder
from collections import defaultdict, Counter
from dotenv import load_dotenv
from gradio_client import Client
from huggingface_hub import InferenceClient
from PIL import Image
from openai import OpenAI
import asyncio
import edge_tts
from streamlit_marquee import streamlit_marquee
st.set_page_config(
page_title="π²TalkingAIResearcherπ",
page_icon="π²π",
layout="wide"
)
EDGE_TTS_VOICES = [
"en-US-AriaNeural",
"en-US-GuyNeural",
"en-US-JennyNeural",
"en-GB-SoniaNeural"
]
FILE_EMOJIS = {
"md": "π",
"mp3": "π΅",
"wav": "π",
"txt": "π",
"pdf": "π"
}
# Initialize session states
if 'tts_voice' not in st.session_state:
st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
if 'audio_format' not in st.session_state:
st.session_state['audio_format'] = 'mp3'
if 'messages' not in st.session_state:
st.session_state['messages'] = []
if 'chat_history' not in st.session_state:
st.session_state['chat_history'] = []
if 'viewing_prefix' not in st.session_state:
st.session_state['viewing_prefix'] = None
if 'should_rerun' not in st.session_state:
st.session_state['should_rerun'] = False
# API Setup
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
@st.cache_resource
def get_cached_audio_b64(file_path):
with open(file_path, "rb") as f:
return base64.b64encode(f.read()).decode()
def beautify_filename(filename):
name = os.path.splitext(filename)[0]
return name.replace('_', ' ').replace('.', ' ')
def create_zip_of_files(md_files, mp3_files, wav_files, query=''):
all_files = md_files + mp3_files + wav_files
if not all_files: return None
timestamp = datetime.now().strftime("%y%m_%H%M")
zip_name = f"{timestamp}_archive.zip"
with zipfile.ZipFile(zip_name, 'w') as z:
for f in all_files:
z.write(f)
return zip_name
def get_download_link(file_path, file_type="zip"):
with open(file_path, "rb") as f:
b64 = base64.b64encode(f.read()).decode()
ext_map = {'zip': 'π¦', 'mp3': 'π΅', 'wav': 'π', 'md': 'π'}
emoji = ext_map.get(file_type, '')
return f'{emoji} Download {os.path.basename(file_path)}'
def load_files_for_sidebar():
files = [f for f in glob.glob("*.*") if not f.lower().endswith('readme.md')]
groups = defaultdict(list)
for f in files:
basename = os.path.basename(f)
group_name = basename[:9] if len(basename) >= 9 else 'Other'
groups[group_name].append(f)
return sorted(groups.items(),
key=lambda x: max(os.path.getmtime(f) for f in x[1]),
reverse=True)
def display_marquee_controls():
st.sidebar.markdown("### π― Marquee Settings")
cols = st.sidebar.columns(2)
with cols[0]:
bg_color = st.color_picker("π¨ Background", "#1E1E1E")
text_color = st.color_picker("βοΈ Text", "#FFFFFF")
with cols[1]:
font_size = st.slider("π Size", 10, 24, 14)
duration = st.slider("β±οΈ Speed", 1, 20, 10)
return {
"background": bg_color,
"color": text_color,
"font-size": f"{font_size}px",
"animationDuration": f"{duration}s",
"width": "100%",
"lineHeight": "35px"
}
def display_file_manager_sidebar(groups_sorted):
st.sidebar.title("π File Manager")
all_files = {'md': [], 'mp3': [], 'wav': []}
for _, files in groups_sorted:
for f in files:
ext = os.path.splitext(f)[1].lower().strip('.')
if ext in all_files:
all_files[ext].append(f)
cols = st.sidebar.columns(4)
for i, (ext, files) in enumerate(all_files.items()):
with cols[i]:
if st.button(f"ποΈ {ext.upper()}"):
[os.remove(f) for f in files]
st.session_state.should_rerun = True
if st.sidebar.button("π¦ Zip All"):
zip_name = create_zip_of_files(
all_files['md'], all_files['mp3'], all_files['wav']
)
if zip_name:
st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)
for group_name, files in groups_sorted:
timestamp = (datetime.strptime(group_name, "%y%m_%H%M").strftime("%Y-%m-%d %H:%M")
if len(group_name) == 9 else group_name)
with st.sidebar.expander(f"π {timestamp} ({len(files)})", expanded=True):
c1, c2 = st.columns(2)
with c1:
if st.button("π", key=f"view_{group_name}"):
st.session_state.viewing_prefix = group_name
with c2:
if st.button("ποΈ", key=f"del_{group_name}"):
[os.remove(f) for f in files]
st.session_state.should_rerun = True
for f in files:
ext = os.path.splitext(f)[1].lower().strip('.')
emoji = FILE_EMOJIS.get(ext, 'π')
pretty_name = beautify_filename(os.path.basename(f))
st.write(f"{emoji} **{pretty_name}**")
if ext in ['mp3', 'wav']:
st.audio(f)
if st.button("π", key=f"loop_{f}"):
audio_b64 = get_cached_audio_b64(f)
st.components.v1.html(
f'''
''',
height=0
)
async def edge_tts_generate(text, voice, file_format="mp3"):
text = re.sub(r'\s+', ' ', text).strip()
if not text: return None
communicate = edge_tts.Communicate(text, voice)
filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}"
await communicate.save(filename)
return filename
def parse_arxiv_refs(text):
papers = []
current_paper = None
for line in text.split('\n'):
if '|' in line:
if current_paper:
papers.append(current_paper)
parts = line.strip('* ').split('|')
current_paper = {
'date': parts[0].strip(),
'title': parts[1].strip(),
'authors': '',
'summary': '',
'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else ''
}
elif current_paper:
if not current_paper['authors']:
current_paper['authors'] = line.strip('* ')
else:
current_paper['summary'] += ' ' + line.strip()
if current_paper:
papers.append(current_paper)
return papers
def perform_ai_lookup(query):
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
response = client.predict(
query, 20, "Semantic Search",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
api_name="/update_with_rag_md"
)
papers = parse_arxiv_refs(response[0])
marquee_settings = display_marquee_controls()
for paper in papers:
content = f"π {paper['title']} | π€ {paper['authors']} | π {paper['summary']}"
streamlit_marquee(
content=content,
**marquee_settings,
key=f"paper_{paper['id'] or random.randint(1000,9999)}"
)
st.write("") # Spacing
return papers
def main():
marquee_settings = display_marquee_controls()
streamlit_marquee(
content="π Welcome to TalkingAIResearcher | π€ Your Research Assistant",
**marquee_settings,
key="welcome"
)
tab = st.radio("Action:", ["π€ Voice", "π ArXiv", "π Editor"], horizontal=True)
if tab == "π ArXiv":
query = st.text_input("π Search:")
if query:
papers = perform_ai_lookup(query)
st.write(f"Found {len(papers)} papers")
groups = load_files_for_sidebar()
display_file_manager_sidebar(groups)
if st.session_state.should_rerun:
st.session_state.should_rerun = False
st.rerun()
# Condensed sidebar markdown
sidebar_md = """# π Research Papers
## π§ AGI Levels
L0 β No AI
L1 π± ChatGPT/Bard [2303.08774v1](https://arxiv.org/abs/2303.08774) [PDF](https://arxiv.org/pdf/2303.08774.pdf)
L2 πͺ Watson [2201.11903v1](https://arxiv.org/abs/2201.11903) [PDF](https://arxiv.org/pdf/2201.11903.pdf)
L3 π― DALLΒ·E [2204.06125v1](https://arxiv.org/abs/2204.06125) [PDF](https://arxiv.org/pdf/2204.06125.pdf)
L4 π AlphaGo [1712.01815v1](https://arxiv.org/abs/1712.01815) [PDF](https://arxiv.org/pdf/1712.01815.pdf)
L5 π AlphaFold [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
## 𧬠AlphaFold2
[2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
1. 𧬠Input Seq β 2. π DB Search β 3. 𧩠MSA
4. π Templates β 5. π Evoformer β 6. 𧱠Structure
7. π― 3D Predict β 8. β»οΈ Recycle x3"""
st.sidebar.markdown(sidebar_md)
if __name__ == "__main__":
main()