Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import streamlit as st | |
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile | |
from datetime import datetime | |
from audio_recorder_streamlit import audio_recorder | |
from collections import defaultdict, Counter | |
from dotenv import load_dotenv | |
from gradio_client import Client | |
from huggingface_hub import InferenceClient | |
from PIL import Image | |
from openai import OpenAI | |
import asyncio | |
import edge_tts | |
from streamlit_marquee import streamlit_marquee | |
st.set_page_config( | |
page_title="๐ฒTalkingAIResearcher๐", | |
page_icon="๐ฒ๐", | |
layout="wide" | |
) | |
EDGE_TTS_VOICES = [ | |
"en-US-AriaNeural", | |
"en-US-GuyNeural", | |
"en-US-JennyNeural", | |
"en-GB-SoniaNeural" | |
] | |
FILE_EMOJIS = { | |
"md": "๐", | |
"mp3": "๐ต", | |
"wav": "๐", | |
"txt": "๐", | |
"pdf": "๐" | |
} | |
# Initialize session states | |
if 'tts_voice' not in st.session_state: | |
st.session_state['tts_voice'] = EDGE_TTS_VOICES[0] | |
if 'audio_format' not in st.session_state: | |
st.session_state['audio_format'] = 'mp3' | |
if 'messages' not in st.session_state: | |
st.session_state['messages'] = [] | |
if 'chat_history' not in st.session_state: | |
st.session_state['chat_history'] = [] | |
if 'viewing_prefix' not in st.session_state: | |
st.session_state['viewing_prefix'] = None | |
if 'should_rerun' not in st.session_state: | |
st.session_state['should_rerun'] = False | |
# API Setup | |
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) | |
claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY')) | |
def get_cached_audio_b64(file_path): | |
with open(file_path, "rb") as f: | |
return base64.b64encode(f.read()).decode() | |
def beautify_filename(filename): | |
name = os.path.splitext(filename)[0] | |
return name.replace('_', ' ').replace('.', ' ') | |
def create_zip_of_files(md_files, mp3_files, wav_files, query=''): | |
all_files = md_files + mp3_files + wav_files | |
if not all_files: return None | |
timestamp = datetime.now().strftime("%y%m_%H%M") | |
zip_name = f"{timestamp}_archive.zip" | |
with zipfile.ZipFile(zip_name, 'w') as z: | |
for f in all_files: | |
z.write(f) | |
return zip_name | |
def get_download_link(file_path, file_type="zip"): | |
with open(file_path, "rb") as f: | |
b64 = base64.b64encode(f.read()).decode() | |
ext_map = {'zip': '๐ฆ', 'mp3': '๐ต', 'wav': '๐', 'md': '๐'} | |
emoji = ext_map.get(file_type, '') | |
return f'<a href="data:application/{file_type};base64,{b64}" download="{os.path.basename(file_path)}">{emoji} Download {os.path.basename(file_path)}</a>' | |
def load_files_for_sidebar(): | |
files = [f for f in glob.glob("*.*") if not f.lower().endswith('readme.md')] | |
groups = defaultdict(list) | |
for f in files: | |
basename = os.path.basename(f) | |
group_name = basename[:9] if len(basename) >= 9 else 'Other' | |
groups[group_name].append(f) | |
return sorted(groups.items(), | |
key=lambda x: max(os.path.getmtime(f) for f in x[1]), | |
reverse=True) | |
def display_marquee_controls(): | |
st.sidebar.markdown("### ๐ฏ Marquee Settings") | |
cols = st.sidebar.columns(2) | |
with cols[0]: | |
bg_color = st.color_picker("๐จ Background", "#1E1E1E") | |
text_color = st.color_picker("โ๏ธ Text", "#FFFFFF") | |
with cols[1]: | |
font_size = st.slider("๐ Size", 10, 24, 14) | |
duration = st.slider("โฑ๏ธ Speed", 1, 20, 10) | |
return { | |
"background": bg_color, | |
"color": text_color, | |
"font-size": f"{font_size}px", | |
"animationDuration": f"{duration}s", | |
"width": "100%", | |
"lineHeight": "35px" | |
} | |
def display_file_manager_sidebar(groups_sorted): | |
st.sidebar.title("๐ File Manager") | |
all_files = {'md': [], 'mp3': [], 'wav': []} | |
for _, files in groups_sorted: | |
for f in files: | |
ext = os.path.splitext(f)[1].lower().strip('.') | |
if ext in all_files: | |
all_files[ext].append(f) | |
cols = st.sidebar.columns(4) | |
for i, (ext, files) in enumerate(all_files.items()): | |
with cols[i]: | |
if st.button(f"๐๏ธ {ext.upper()}"): | |
[os.remove(f) for f in files] | |
st.session_state.should_rerun = True | |
if st.sidebar.button("๐ฆ Zip All"): | |
zip_name = create_zip_of_files( | |
all_files['md'], all_files['mp3'], all_files['wav'] | |
) | |
if zip_name: | |
st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True) | |
for group_name, files in groups_sorted: | |
timestamp = (datetime.strptime(group_name, "%y%m_%H%M").strftime("%Y-%m-%d %H:%M") | |
if len(group_name) == 9 else group_name) | |
with st.sidebar.expander(f"๐ {timestamp} ({len(files)})", expanded=True): | |
c1, c2 = st.columns(2) | |
with c1: | |
if st.button("๐", key=f"view_{group_name}"): | |
st.session_state.viewing_prefix = group_name | |
with c2: | |
if st.button("๐๏ธ", key=f"del_{group_name}"): | |
[os.remove(f) for f in files] | |
st.session_state.should_rerun = True | |
for f in files: | |
ext = os.path.splitext(f)[1].lower().strip('.') | |
emoji = FILE_EMOJIS.get(ext, '๐') | |
pretty_name = beautify_filename(os.path.basename(f)) | |
st.write(f"{emoji} **{pretty_name}**") | |
if ext in ['mp3', 'wav']: | |
st.audio(f) | |
if st.button("๐", key=f"loop_{f}"): | |
audio_b64 = get_cached_audio_b64(f) | |
st.components.v1.html( | |
f''' | |
<audio id="player_{f}" loop> | |
<source src="data:audio/{ext};base64,{audio_b64}"> | |
</audio> | |
<script> | |
document.getElementById("player_{f}").play(); | |
</script> | |
''', | |
height=0 | |
) | |
async def edge_tts_generate(text, voice, file_format="mp3"): | |
text = re.sub(r'\s+', ' ', text).strip() | |
if not text: return None | |
communicate = edge_tts.Communicate(text, voice) | |
filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}" | |
await communicate.save(filename) | |
return filename | |
def parse_arxiv_refs(text): | |
papers = [] | |
current_paper = None | |
for line in text.split('\n'): | |
if '|' in line: | |
if current_paper: | |
papers.append(current_paper) | |
parts = line.strip('* ').split('|') | |
current_paper = { | |
'date': parts[0].strip(), | |
'title': parts[1].strip(), | |
'authors': '', | |
'summary': '', | |
'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else '' | |
} | |
elif current_paper: | |
if not current_paper['authors']: | |
current_paper['authors'] = line.strip('* ') | |
else: | |
current_paper['summary'] += ' ' + line.strip() | |
if current_paper: | |
papers.append(current_paper) | |
return papers | |
def perform_ai_lookup(query): | |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
response = client.predict( | |
query, 20, "Semantic Search", | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
api_name="/update_with_rag_md" | |
) | |
papers = parse_arxiv_refs(response[0]) | |
marquee_settings = display_marquee_controls() | |
for paper in papers: | |
content = f"๐ {paper['title']} | ๐ค {paper['authors']} | ๐ {paper['summary']}" | |
streamlit_marquee( | |
content=content, | |
**marquee_settings, | |
key=f"paper_{paper['id'] or random.randint(1000,9999)}" | |
) | |
st.write("") # Spacing | |
return papers | |
def main(): | |
marquee_settings = display_marquee_controls() | |
streamlit_marquee( | |
content="๐ Welcome to TalkingAIResearcher | ๐ค Your Research Assistant", | |
**marquee_settings, | |
key="welcome" | |
) | |
tab = st.radio("Action:", ["๐ค Voice", "๐ ArXiv", "๐ Editor"], horizontal=True) | |
if tab == "๐ ArXiv": | |
query = st.text_input("๐ Search:") | |
if query: | |
papers = perform_ai_lookup(query) | |
st.write(f"Found {len(papers)} papers") | |
groups = load_files_for_sidebar() | |
display_file_manager_sidebar(groups) | |
if st.session_state.should_rerun: | |
st.session_state.should_rerun = False | |
st.rerun() | |
# Condensed sidebar markdown | |
sidebar_md = """# ๐ Research Papers | |
## ๐ง AGI Levels | |
L0 โ No AI | |
L1 ๐ฑ ChatGPT/Bard [2303.08774v1](https://arxiv.org/abs/2303.08774) [PDF](https://arxiv.org/pdf/2303.08774.pdf) | |
L2 ๐ช Watson [2201.11903v1](https://arxiv.org/abs/2201.11903) [PDF](https://arxiv.org/pdf/2201.11903.pdf) | |
L3 ๐ฏ DALLยทE [2204.06125v1](https://arxiv.org/abs/2204.06125) [PDF](https://arxiv.org/pdf/2204.06125.pdf) | |
L4 ๐ AlphaGo [1712.01815v1](https://arxiv.org/abs/1712.01815) [PDF](https://arxiv.org/pdf/1712.01815.pdf) | |
L5 ๐ AlphaFold [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf) | |
## ๐งฌ AlphaFold2 | |
[2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf) | |
1. ๐งฌ Input Seq โ 2. ๐ DB Search โ 3. ๐งฉ MSA | |
4. ๐ Templates โ 5. ๐ Evoformer โ 6. ๐งฑ Structure | |
7. ๐ฏ 3D Predict โ 8. โป๏ธ Recycle x3""" | |
st.sidebar.markdown(sidebar_md) | |
if __name__ == "__main__": | |
main() |