awacke1's picture
Update app.py
ac9fced verified
raw
history blame
9.79 kB
import streamlit as st
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
from datetime import datetime
from audio_recorder_streamlit import audio_recorder
from collections import defaultdict, Counter
from dotenv import load_dotenv
from gradio_client import Client
from huggingface_hub import InferenceClient
from PIL import Image
from openai import OpenAI
import asyncio
import edge_tts
from streamlit_marquee import streamlit_marquee
st.set_page_config(
page_title="๐ŸšฒTalkingAIResearcher๐Ÿ†",
page_icon="๐Ÿšฒ๐Ÿ†",
layout="wide"
)
EDGE_TTS_VOICES = [
"en-US-AriaNeural",
"en-US-GuyNeural",
"en-US-JennyNeural",
"en-GB-SoniaNeural"
]
FILE_EMOJIS = {
"md": "๐Ÿ“",
"mp3": "๐ŸŽต",
"wav": "๐Ÿ”Š",
"txt": "๐Ÿ“„",
"pdf": "๐Ÿ“‘"
}
# Initialize session states
if 'tts_voice' not in st.session_state:
st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
if 'audio_format' not in st.session_state:
st.session_state['audio_format'] = 'mp3'
if 'messages' not in st.session_state:
st.session_state['messages'] = []
if 'chat_history' not in st.session_state:
st.session_state['chat_history'] = []
if 'viewing_prefix' not in st.session_state:
st.session_state['viewing_prefix'] = None
if 'should_rerun' not in st.session_state:
st.session_state['should_rerun'] = False
# API Setup
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
@st.cache_resource
def get_cached_audio_b64(file_path):
with open(file_path, "rb") as f:
return base64.b64encode(f.read()).decode()
def beautify_filename(filename):
name = os.path.splitext(filename)[0]
return name.replace('_', ' ').replace('.', ' ')
def create_zip_of_files(md_files, mp3_files, wav_files, query=''):
all_files = md_files + mp3_files + wav_files
if not all_files: return None
timestamp = datetime.now().strftime("%y%m_%H%M")
zip_name = f"{timestamp}_archive.zip"
with zipfile.ZipFile(zip_name, 'w') as z:
for f in all_files:
z.write(f)
return zip_name
def get_download_link(file_path, file_type="zip"):
with open(file_path, "rb") as f:
b64 = base64.b64encode(f.read()).decode()
ext_map = {'zip': '๐Ÿ“ฆ', 'mp3': '๐ŸŽต', 'wav': '๐Ÿ”Š', 'md': '๐Ÿ“'}
emoji = ext_map.get(file_type, '')
return f'<a href="data:application/{file_type};base64,{b64}" download="{os.path.basename(file_path)}">{emoji} Download {os.path.basename(file_path)}</a>'
def load_files_for_sidebar():
files = [f for f in glob.glob("*.*") if not f.lower().endswith('readme.md')]
groups = defaultdict(list)
for f in files:
basename = os.path.basename(f)
group_name = basename[:9] if len(basename) >= 9 else 'Other'
groups[group_name].append(f)
return sorted(groups.items(),
key=lambda x: max(os.path.getmtime(f) for f in x[1]),
reverse=True)
def display_marquee_controls():
st.sidebar.markdown("### ๐ŸŽฏ Marquee Settings")
cols = st.sidebar.columns(2)
with cols[0]:
bg_color = st.color_picker("๐ŸŽจ Background", "#1E1E1E")
text_color = st.color_picker("โœ๏ธ Text", "#FFFFFF")
with cols[1]:
font_size = st.slider("๐Ÿ“ Size", 10, 24, 14)
duration = st.slider("โฑ๏ธ Speed", 1, 20, 10)
return {
"background": bg_color,
"color": text_color,
"font-size": f"{font_size}px",
"animationDuration": f"{duration}s",
"width": "100%",
"lineHeight": "35px"
}
def display_file_manager_sidebar(groups_sorted):
st.sidebar.title("๐Ÿ“š File Manager")
all_files = {'md': [], 'mp3': [], 'wav': []}
for _, files in groups_sorted:
for f in files:
ext = os.path.splitext(f)[1].lower().strip('.')
if ext in all_files:
all_files[ext].append(f)
cols = st.sidebar.columns(4)
for i, (ext, files) in enumerate(all_files.items()):
with cols[i]:
if st.button(f"๐Ÿ—‘๏ธ {ext.upper()}"):
[os.remove(f) for f in files]
st.session_state.should_rerun = True
if st.sidebar.button("๐Ÿ“ฆ Zip All"):
zip_name = create_zip_of_files(
all_files['md'], all_files['mp3'], all_files['wav']
)
if zip_name:
st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)
for group_name, files in groups_sorted:
timestamp = (datetime.strptime(group_name, "%y%m_%H%M").strftime("%Y-%m-%d %H:%M")
if len(group_name) == 9 else group_name)
with st.sidebar.expander(f"๐Ÿ“ {timestamp} ({len(files)})", expanded=True):
c1, c2 = st.columns(2)
with c1:
if st.button("๐Ÿ‘€", key=f"view_{group_name}"):
st.session_state.viewing_prefix = group_name
with c2:
if st.button("๐Ÿ—‘๏ธ", key=f"del_{group_name}"):
[os.remove(f) for f in files]
st.session_state.should_rerun = True
for f in files:
ext = os.path.splitext(f)[1].lower().strip('.')
emoji = FILE_EMOJIS.get(ext, '๐Ÿ“„')
pretty_name = beautify_filename(os.path.basename(f))
st.write(f"{emoji} **{pretty_name}**")
if ext in ['mp3', 'wav']:
st.audio(f)
if st.button("๐Ÿ”„", key=f"loop_{f}"):
audio_b64 = get_cached_audio_b64(f)
st.components.v1.html(
f'''
<audio id="player_{f}" loop>
<source src="data:audio/{ext};base64,{audio_b64}">
</audio>
<script>
document.getElementById("player_{f}").play();
</script>
''',
height=0
)
async def edge_tts_generate(text, voice, file_format="mp3"):
text = re.sub(r'\s+', ' ', text).strip()
if not text: return None
communicate = edge_tts.Communicate(text, voice)
filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}"
await communicate.save(filename)
return filename
def parse_arxiv_refs(text):
papers = []
current_paper = None
for line in text.split('\n'):
if '|' in line:
if current_paper:
papers.append(current_paper)
parts = line.strip('* ').split('|')
current_paper = {
'date': parts[0].strip(),
'title': parts[1].strip(),
'authors': '',
'summary': '',
'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else ''
}
elif current_paper:
if not current_paper['authors']:
current_paper['authors'] = line.strip('* ')
else:
current_paper['summary'] += ' ' + line.strip()
if current_paper:
papers.append(current_paper)
return papers
def perform_ai_lookup(query):
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
response = client.predict(
query, 20, "Semantic Search",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
api_name="/update_with_rag_md"
)
papers = parse_arxiv_refs(response[0])
marquee_settings = display_marquee_controls()
for paper in papers:
content = f"๐Ÿ“„ {paper['title']} | ๐Ÿ‘ค {paper['authors']} | ๐Ÿ“ {paper['summary']}"
streamlit_marquee(
content=content,
**marquee_settings,
key=f"paper_{paper['id'] or random.randint(1000,9999)}"
)
st.write("") # Spacing
return papers
def main():
marquee_settings = display_marquee_controls()
streamlit_marquee(
content="๐Ÿš€ Welcome to TalkingAIResearcher | ๐Ÿค– Your Research Assistant",
**marquee_settings,
key="welcome"
)
tab = st.radio("Action:", ["๐ŸŽค Voice", "๐Ÿ” ArXiv", "๐Ÿ“ Editor"], horizontal=True)
if tab == "๐Ÿ” ArXiv":
query = st.text_input("๐Ÿ” Search:")
if query:
papers = perform_ai_lookup(query)
st.write(f"Found {len(papers)} papers")
groups = load_files_for_sidebar()
display_file_manager_sidebar(groups)
if st.session_state.should_rerun:
st.session_state.should_rerun = False
st.rerun()
# Condensed sidebar markdown
sidebar_md = """# ๐Ÿ“š Research Papers
## ๐Ÿง  AGI Levels
L0 โŒ No AI
L1 ๐ŸŒฑ ChatGPT/Bard [2303.08774v1](https://arxiv.org/abs/2303.08774) [PDF](https://arxiv.org/pdf/2303.08774.pdf)
L2 ๐Ÿ’ช Watson [2201.11903v1](https://arxiv.org/abs/2201.11903) [PDF](https://arxiv.org/pdf/2201.11903.pdf)
L3 ๐ŸŽฏ DALLยทE [2204.06125v1](https://arxiv.org/abs/2204.06125) [PDF](https://arxiv.org/pdf/2204.06125.pdf)
L4 ๐Ÿ† AlphaGo [1712.01815v1](https://arxiv.org/abs/1712.01815) [PDF](https://arxiv.org/pdf/1712.01815.pdf)
L5 ๐Ÿš€ AlphaFold [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
## ๐Ÿงฌ AlphaFold2
[2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
1. ๐Ÿงฌ Input Seq โ†’ 2. ๐Ÿ” DB Search โ†’ 3. ๐Ÿงฉ MSA
4. ๐Ÿ“‘ Templates โ†’ 5. ๐Ÿ”„ Evoformer โ†’ 6. ๐Ÿงฑ Structure
7. ๐ŸŽฏ 3D Predict โ†’ 8. โ™ป๏ธ Recycle x3"""
st.sidebar.markdown(sidebar_md)
if __name__ == "__main__":
main()