DeepResearchEvaluator

Running

App Files Files Community

DeepResearchEvaluator / app.py

awacke1

Update app.py

f19bbef verified 6 months ago

raw

history blame

25.9 kB

	#!/usr/bin/env python3

	import os
	import re
	import glob
	import json
	import base64
	import zipfile
	import random
	import requests
	import openai
	from PIL import Image
	from urllib.parse import quote

	import streamlit as st
	import streamlit.components.v1 as components

	# For demonstration, from huggingface_hub
	from huggingface_hub import InferenceClient

	# -----------------------------------------------------
	# Ensure default MarkdownCode.md & MermaidCode.md exist
	# -----------------------------------------------------
	if not os.path.exists("MarkdownCode.md"):
	with open("MarkdownCode.md", 'w', encoding='utf-8') as f:
	f.write("# Default Markdown\nThis is a default Markdown file.")
	st.experimental_rerun()

	if not os.path.exists("MermaidCode.md"):
	with open("MermaidCode.md", 'w', encoding='utf-8') as f:
	# IMPORTANT: Each click line now has a 2nd string for tooltip
	f.write("""flowchart LR
	%% Minimal example with correct 'click' syntax
	%% - "Tooltip text" between the URL and the target
	A[Default] --> B[Example]
	click A "/?q=Default" "Open Default" "_self"
	click B "/?q=Example" "Open Example" "_self"
	""")
	st.experimental_rerun()

	# ----------------------------
	# Placeholder data structures
	# ----------------------------
	PromptPrefix = "AI-Search: "
	PromptPrefix2 = "AI-Refine: "
	PromptPrefix3 = "AI-JS: "

	roleplaying_glossary = {
	"Core Rulebooks": {
	"Dungeons and Dragons": ["Player's Handbook", "Dungeon Master's Guide", "Monster Manual"],
	"GURPS": ["Basic Set Characters", "Basic Set Campaigns"]
	},
	"Campaigns & Adventures": {
	"Pathfinder": ["Rise of the Runelords", "Curse of the Crimson Throne"]
	}
	}

	transhuman_glossary = {
	"Neural Interfaces": ["Cortex Jack", "Mind-Machine Fusion"],
	"Cybernetics": ["Robotic Limbs", "Augmented Eyes"],
	}

	# ------------
	# Stub Methods
	# ------------
	def process_text(text):
	st.write(f"process_text called with: {text}")

	def process_text2(text_input):
	return f"[process_text2 placeholder] Received: {text_input}"

	def search_arxiv(text):
	st.write(f"search_arxiv called with: {text}")

	def SpeechSynthesis(text):
	st.write(f"SpeechSynthesis called with: {text}")

	def process_image(image_file, prompt):
	return f"[process_image placeholder] Processing {image_file} with prompt: {prompt}"

	def process_video(video_file, seconds_per_frame):
	st.write(f"[process_video placeholder] Video: {video_file}, seconds/frame: {seconds_per_frame}")

	def search_glossary(content):
	st.write(f"search_glossary called with: {content}")

	API_URL = "https://huggingface-inference-endpoint-placeholder"
	API_KEY = "hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

	@st.cache_resource
	def InferenceLLM(prompt):
	return f"[InferenceLLM placeholder response to prompt: {prompt}]"


	# --------------------------------------
	# Display Entities & Glossary Functions
	# --------------------------------------
	@st.cache_resource
	def display_glossary_entity(k):
	search_urls = {
	"🚀🌌ArXiv": lambda k: f"/?q={quote(k)}",
	"🃏Analyst": lambda k: f"/?q={quote(k)}-{quote(PromptPrefix)}",
	"📚PyCoder": lambda k: f"/?q={quote(k)}-{quote(PromptPrefix2)}",
	"🔬JSCoder": lambda k: f"/?q={quote(k)}-{quote(PromptPrefix3)}",
	"📖": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}",
	"🔍": lambda k: f"https://www.google.com/search?q={quote(k)}",
	"🔎": lambda k: f"https://www.bing.com/search?q={quote(k)}",
	"🎥": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
	"🐦": lambda k: f"https://twitter.com/search?q={quote(k)}",
	}
	links_md = ' '.join([f"[{emoji}]({url(k)})" for emoji, url in search_urls.items()])
	st.markdown(f"{k} <small>{links_md}</small>", unsafe_allow_html=True)


	@st.cache_resource
	def display_glossary_grid(roleplaying_glossary):
	search_urls = {
	"🚀🌌ArXiv": lambda k: f"/?q={quote(k)}",
	"🃏Analyst": lambda k: f"/?q={quote(k)}-{quote(PromptPrefix)}",
	"📚PyCoder": lambda k: f"/?q={quote(k)}-{quote(PromptPrefix2)}",
	"🔬JSCoder": lambda k: f"/?q={quote(k)}-{quote(PromptPrefix3)}",
	"📖": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}",
	"🔍": lambda k: f"https://www.google.com/search?q={quote(k)}",
	"🔎": lambda k: f"https://www.bing.com/search?q={quote(k)}",
	"🎥": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
	"🐦": lambda k: f"https://twitter.com/search?q={quote(k)}",
	}

	for category, details in roleplaying_glossary.items():
	st.write(f"### {category}")
	cols = st.columns(len(details))
	for idx, (game, terms) in enumerate(details.items()):
	with cols[idx]:
	st.markdown(f"#### {game}")
	for term in terms:
	links_md = ' '.join([f"[{emoji}]({url(term)})" for emoji, url in search_urls.items()])
	st.markdown(f"{term} <small>{links_md}</small>", unsafe_allow_html=True)


	# --------------------
	# File-Handling Logic
	# --------------------
	def load_file(file_path):
	try:
	with open(file_path, "r", encoding='utf-8') as f:
	return f.read()
	except:
	return ""

	@st.cache_resource
	def create_zip_of_files(files):
	zip_name = "Arxiv-Paper-Search-QA-RAG-Streamlit-Gradio-AP.zip"
	with zipfile.ZipFile(zip_name, 'w') as zipf:
	for file in files:
	zipf.write(file)
	return zip_name

	@st.cache_resource
	def get_zip_download_link(zip_file):
	with open(zip_file, 'rb') as f:
	data = f.read()
	b64 = base64.b64encode(data).decode()
	href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
	return href

	def get_table_download_link(file_path):
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	data = file.read()
	b64 = base64.b64encode(data.encode()).decode()
	file_name = os.path.basename(file_path)
	ext = os.path.splitext(file_name)[1]
	mime_map = {
	'.txt': 'text/plain',
	'.py': 'text/plain',
	'.xlsx': 'text/plain',
	'.csv': 'text/plain',
	'.htm': 'text/html',
	'.md': 'text/markdown',
	'.wav': 'audio/wav'
	}
	mime_type = mime_map.get(ext, 'application/octet-stream')
	href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
	return href
	except:
	return ''

	def get_file_size(file_path):
	return os.path.getsize(file_path)

	def compare_and_delete_files(files):
	if not files:
	st.warning("No files to compare.")
	return
	file_sizes = {}
	for file in files:
	size = os.path.getsize(file)
	file_sizes.setdefault(size, []).append(file)
	for size, paths in file_sizes.items():
	if len(paths) > 1:
	latest_file = max(paths, key=os.path.getmtime)
	for file in paths:
	if file != latest_file:
	os.remove(file)
	st.success(f"Deleted {file} as a duplicate.")
	st.rerun()

	def FileSidebar():
	"""
	Renders the file sidebar with open/view/run/delete logic.
	Excludes README.md from the list.
	"""
	all_files = glob.glob("*.md")
	# Exclude README.md
	all_files = [f for f in all_files if f != 'README.md']
	# Filter out short-named files if desired
	all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 5]
	all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True)

	Files1, Files2 = st.sidebar.columns(2)
	with Files1:
	if st.button("🗑 Delete All"):
	for file in all_files:
	os.remove(file)
	st.rerun()
	with Files2:
	if st.button("⬇️ Download"):
	zip_file = create_zip_of_files(all_files)
	st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)

	file_contents = ''
	file_name = ''
	next_action = ''

	for file in all_files:
	col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1])
	with col1:
	if st.button("🌐", key="md_"+file):
	file_contents = load_file(file)
	file_name = file
	next_action = 'md'
	st.session_state['next_action'] = next_action
	with col2:
	st.markdown(get_table_download_link(file), unsafe_allow_html=True)
	with col3:
	if st.button("📂", key="open_"+file):
	file_contents = load_file(file)
	file_name = file
	next_action = 'open'
	st.session_state['lastfilename'] = file
	st.session_state['filename'] = file
	st.session_state['filetext'] = file_contents
	st.session_state['next_action'] = next_action
	with col4:
	if st.button("▶️", key="read_"+file):
	file_contents = load_file(file)
	file_name = file
	next_action = 'search'
	st.session_state['next_action'] = next_action
	with col5:
	if st.button("🗑", key="delete_"+file):
	os.remove(file)
	st.rerun()

	file_sizes = [get_file_size(file) for file in all_files]
	previous_size = None
	st.sidebar.title("File Operations")
	for file, size in zip(all_files, file_sizes):
	duplicate_flag = "🚩" if size == previous_size else ""
	with st.sidebar.expander(f"File: {file} {duplicate_flag}"):
	st.text(f"Size: {size} bytes")
	if st.button("View", key=f"view_{file}"):
	try:
	with open(file, "r", encoding='utf-8') as f:
	file_content = f.read()
	st.code(file_content, language="markdown")
	except UnicodeDecodeError:
	st.error("Failed to decode the file with UTF-8.")
	if st.button("Delete", key=f"delete3_{file}"):
	os.remove(file)
	st.rerun()
	previous_size = size

	if len(file_contents) > 0:
	if next_action == 'open':
	open1, open2 = st.columns([0.8, 0.2])
	with open1:
	file_name_input = st.text_input('File Name:', file_name, key='file_name_input')
	file_content_area = st.text_area('File Contents:', file_contents, height=300, key='file_content_area')

	if st.button('💾 Save File'):
	with open(file_name_input, 'w', encoding='utf-8') as f:
	f.write(file_content_area)
	st.markdown(f'Saved {file_name_input} successfully.')

	elif next_action == 'search':
	file_content_area = st.text_area("File Contents:", file_contents, height=500)
	user_prompt = PromptPrefix2 + file_contents
	st.markdown(user_prompt)
	if st.button('🔍Re-Code'):
	search_arxiv(file_contents)

	elif next_action == 'md':
	st.markdown(file_contents)
	SpeechSynthesis(file_contents)
	if st.button('🔍Run'):
	st.write("Running GPT logic placeholder...")


	# ---------------------------
	# Basic Scoring / Glossaries
	# ---------------------------
	score_dir = "scores"
	os.makedirs(score_dir, exist_ok=True)

	def generate_key(label, header, idx):
	return f"{header}_{label}_{idx}_key"

	def update_score(key, increment=1):
	score_file = os.path.join(score_dir, f"{key}.json")
	if os.path.exists(score_file):
	with open(score_file, "r") as file:
	score_data = json.load(file)
	else:
	score_data = {"clicks": 0, "score": 0}
	score_data["clicks"] += increment
	score_data["score"] += increment
	with open(score_file, "w") as file:
	json.dump(score_data, file)
	return score_data["score"]

	def load_score(key):
	score_file = os.path.join(score_dir, f"{key}.json")
	if os.path.exists(score_file):
	with open(score_file, "r") as file:
	score_data = json.load(file)
	return score_data["score"]
	return 0

	def display_buttons_with_scores(num_columns_text):
	game_emojis = {
	"Dungeons and Dragons": "🐉",
	"Call of Cthulhu": "🐙",
	"GURPS": "🎲",
	"Pathfinder": "🗺️",
	"Kindred of the East": "🌅",
	"Changeling": "🍃",
	}

	topic_emojis = {
	"Core Rulebooks": "📚",
	"Maps & Settings": "🗺️",
	"Game Mechanics & Tools": "⚙️",
	"Monsters & Adversaries": "👹",
	"Campaigns & Adventures": "📜",
	"Creatives & Assets": "🎨",
	"Game Master Resources": "🛠️",
	"Lore & Background": "📖",
	"Character Development": "🧍",
	"Homebrew Content": "🔧",
	"General Topics": "🌍",
	}

	for category, games in roleplaying_glossary.items():
	category_emoji = topic_emojis.get(category, "🔍")
	st.markdown(f"## {category_emoji} {category}")
	for game, terms in games.items():
	game_emoji = game_emojis.get(game, "🎮")
	for term in terms:
	key = f"{category}_{game}_{term}".replace(' ', '_').lower()
	score = load_score(key)
	if st.button(f"{game_emoji} {category} {game} {term} {score}", key=key):
	newscore = update_score(key.replace('?',''))
	st.markdown(f"Scored {category} - {game} - {term} -> {newscore}")


	# --------------------
	# Image & Video Grids
	# --------------------
	def display_images_and_wikipedia_summaries(num_columns=4):
	image_files = [f for f in os.listdir('.') if f.endswith('.png')]
	if not image_files:
	st.write("No PNG images found in the current directory.")
	return

	image_files_sorted = sorted(image_files, key=lambda x: len(x.split('.')[0]))
	cols = st.columns(num_columns)
	col_index = 0

	for image_file in image_files_sorted:
	with cols[col_index % num_columns]:
	try:
	image = Image.open(image_file)
	st.image(image, use_column_width=True)
	k = image_file.split('.')[0]
	display_glossary_entity(k)
	image_text_input = st.text_input(f"Prompt for {image_file}", key=f"image_prompt_{image_file}")
	if len(image_text_input) > 0:
	response = process_image(image_file, image_text_input)
	st.markdown(response)
	except:
	st.write(f"Could not open {image_file}")
	col_index += 1

	def display_videos_and_links(num_columns=4):
	video_files = [f for f in os.listdir('.') if f.endswith(('.mp4', '.webm'))]
	if not video_files:
	st.write("No MP4 or WEBM videos found in the current directory.")
	return

	video_files_sorted = sorted(video_files, key=lambda x: len(x.split('.')[0]))
	cols = st.columns(num_columns)
	col_index = 0

	for video_file in video_files_sorted:
	with cols[col_index % num_columns]:
	k = video_file.split('.')[0]
	st.video(video_file, format='video/mp4', start_time=0)
	display_glossary_entity(k)
	video_text_input = st.text_input(f"Video Prompt for {video_file}", key=f"video_prompt_{video_file}")
	if video_text_input:
	try:
	seconds_per_frame = 10
	process_video(video_file, seconds_per_frame)
	except ValueError:
	st.error("Invalid input for seconds per frame!")
	col_index += 1


	# -------------------------------------
	# Query Param Helpers
	# -------------------------------------
	def get_all_query_params(key):
	return st.query_params.get(key, [])

	def clear_query_params():
	st.query_params

	def display_content_or_image(query):
	for category, term_list in transhuman_glossary.items():
	for term in term_list:
	if query.lower() in term.lower():
	st.subheader(f"Found in {category}:")
	st.write(term)
	return True
	image_path = f"images/{query}.png"
	if os.path.exists(image_path):
	st.image(image_path, caption=f"Image for {query}")
	return True
	st.warning("No matching content or image found.")
	return False


	# ------------------------------------
	# MERMAID DIAGRAM with Tooltip Fix
	# ------------------------------------
	def generate_mermaid_html(mermaid_code: str) -> str:
	return f"""
	<html>
	<head>
	<script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script>
	<style>
	.centered-mermaid {{
	display: flex;
	justify-content: center;
	margin: 20px auto;
	}}
	.mermaid {{
	max-width: 800px;
	}}
	</style>
	</head>
	<body>
	<div class="mermaid centered-mermaid">
	{mermaid_code}
	</div>
	<script>
	mermaid.initialize({{ startOnLoad: true }});
	</script>
	</body>
	</html>
	"""

	def append_model_param(url: str, model_selected: bool) -> str:
	if not model_selected:
	return url
	delimiter = "&" if "?" in url else "?"
	return f"{url}{delimiter}model=1"


	def main():
	st.set_page_config(page_title="Mermaid + Clickable Links Demo", layout="wide")

	# 1) Parse query strings using st.query_params
	query_params = st.query_params
	current_q = query_params.get("q", [""])[0]
	current_r = query_params.get("r", [""])[0]

	st.sidebar.write("## Diagram Link Settings")
	model_selected = st.sidebar.checkbox("Append ?model=1 to each link?")

	# Load from .md files
	markdown_default = load_file("MarkdownCode.md")
	mermaid_default = load_file("MermaidCode.md")

	# Rebuild for clickable diagram with optional &model=1
	base_diagram = mermaid_default or ""
	lines = base_diagram.strip().split("\n")
	new_lines = []
	for line in lines:
	if "click " in line and '"/?' in line:
	# We expect 4 segments: e.g. click A "URL" "Tooltip" "_self"
	parts = re.split(r'click\s+\S+\s+"([^"]+)"\s+"([^"]+)"\s+"([^"]+)"', line)
	# parts => [prefix, URL, tooltip, target, remainder?]
	if len(parts) == 5:
	url = parts[1]
	tooltip = parts[2]
	target = parts[3]
	updated_url = append_model_param(url, model_selected)
	# Reassemble line
	new_line = f"{parts[0]}click {new_lines_joiner(parts[0])}\"{updated_url}\" \"{tooltip}\" \"{target}\"{parts[4]}"
	# But we need the node name from the 'prefix' part. Let's do a simpler approach:
	# We can do a capturing group for the node ID in a separate pattern or handle manually.
	# For simplicity, let's just rewrite if the user used consistent 'click NodeID "URL" "Tooltip" "Target"'
	# We'll do a second approach:

	# We'll do a direct approach: parse manually with a simpler pattern
	# Instead, let's keep it simpler for demonstration:
	# If your usage is consistent, you can skip all this complexity and just do a standard line replacement.

	new_lines.append("click fix: " + line) # placeholder, see below
	else:
	# Possibly different or partial usage
	# We'll do the simpler approach: just find the quoted URL & update it
	short_line = re.split(r'click\s+(\S+)\s+"([^"]+)"\s+"([^"]+)"\s+"([^"]+)"', line)
	# If it doesn't match, we just leave it as is or attempt a simpler replace:
	# We'll attempt a simpler approach with a single replace if the user always uses the second quote for the URL
	# ...
	updated_line = line
	# We look for the second quoted substring if possible
	# This can get complicated quickly, so let's keep it minimal:
	updated_line = re.sub(r'click\s+(\S+)\s+"([^"]+)"\s+"([^"]+)"\s+"([^"]+)"',
	lambda m: f'click {m.group(1)} "{append_model_param(m.group(2), model_selected)}" "{m.group(3)}" "{m.group(4)}"',
	line)
	new_lines.append(updated_line)
	else:
	new_lines.append(line)
	mermaid_code = "\n".join(new_lines)

	st.title("Top-Centered Mermaid Diagram with Clickable Links 🏺")
	diagram_html = generate_mermaid_html(mermaid_code)
	components.html(diagram_html, height=400, scrolling=True)

	# Show inbound ?q or ?r
	if current_q:
	st.markdown(f"Detected Query: `?q={current_q}`")
	display_content_or_image(current_q)
	if current_r:
	st.markdown(f"Detected Relationship: `?r={current_r}`")

	left_col, right_col = st.columns(2)

	# --- Left: Markdown Editor
	with left_col:
	st.subheader("Markdown Side 📝")
	if "markdown_text" not in st.session_state:
	st.session_state["markdown_text"] = markdown_default

	markdown_text = st.text_area(
	"Edit Markdown:",
	value=st.session_state["markdown_text"],
	height=300
	)
	st.session_state["markdown_text"] = markdown_text

	colA, colB, colC, colD = st.columns(4)
	with colA:
	if st.button("🔄 Refresh"):
	st.write("Markdown content refreshed! 🍿")
	with colB:
	if st.button("❌ Clear"):
	st.session_state["markdown_text"] = ""
	st.experimental_rerun()
	with colC:
	if st.button("💾 File Save"):
	with open("MarkdownCode.md", 'w', encoding='utf-8') as f:
	f.write(markdown_text)
	st.success("Saved to MarkdownCode.md")
	with colD:
	md_filename = st.text_input("Filename for Markdown:", value="MarkdownCode.md", key="md_filename_key")
	if st.button("💾 Save As"):
	with open(md_filename, 'w', encoding='utf-8') as f:
	f.write(markdown_text)
	st.success(f"Saved to {md_filename}")

	st.markdown("---")
	st.markdown("Preview:")
	st.markdown(markdown_text)

	# --- Right: Mermaid Editor
	with right_col:
	st.subheader("Mermaid Side 🧜‍♂️")
	if "current_mermaid" not in st.session_state:
	st.session_state["current_mermaid"] = mermaid_default

	mermaid_input = st.text_area(
	"Edit Mermaid Code:",
	value=st.session_state["current_mermaid"],
	height=300
	)

	colC, colD, colE, colF = st.columns(4)
	with colC:
	if st.button("🎨 Refresh"):
	st.session_state["current_mermaid"] = mermaid_input
	st.write("Mermaid diagram refreshed! 🌈")
	st.experimental_rerun()
	with colD:
	if st.button("❌ Clear "):
	st.session_state["current_mermaid"] = ""
	st.experimental_rerun()
	with colE:
	if st.button("💾 File Save "):
	with open("MermaidCode.md", 'w', encoding='utf-8') as f:
	f.write(mermaid_input)
	st.success("Saved to MermaidCode.md")
	with colF:
	mermaid_filename = st.text_input("Filename for Mermaid:", value="MermaidCode.md", key="mermaid_filename_key")
	if st.button("💾 Save As "):
	with open(mermaid_filename, 'w', encoding='utf-8') as f:
	f.write(mermaid_input)
	st.success(f"Saved to {mermaid_filename}")

	st.markdown("---")
	st.markdown("Mermaid Source:")
	st.code(mermaid_input, language="python", line_numbers=True)

	st.markdown("---")
	st.header("Media Galleries")

	num_columns_images = st.slider("Choose Number of Image Columns", 1, 15, 5, key="num_columns_images")
	display_images_and_wikipedia_summaries(num_columns_images)

	num_columns_video = st.slider("Choose Number of Video Columns", 1, 15, 5, key="num_columns_video")
	display_videos_and_links(num_columns_video)

	showExtendedTextInterface = False
	if showExtendedTextInterface:
	display_glossary_grid(roleplaying_glossary)
	num_columns_text = st.slider("Choose Number of Text Columns", 1, 15, 4, key="num_columns_text")
	display_buttons_with_scores(num_columns_text)
	st.markdown("Extended text interface is on...")

	FileSidebar()

	# Random Title at bottom
	titles = [
	"🧠🎭 Semantic Symphonies & Episodic Encores",
	"🌌🎼 AI Rhythms of Memory Lane",
	"🎭🎉 Cognitive Crescendos & Neural Harmonies",
	"🧠🎺 Mnemonic Melodies & Synaptic Grooves",
	"🎼🎸 Straight Outta Cognition",
	"🥁🎻 Jazzy Jambalaya of AI Memories",
	"🏰 Semantic Soul & Episodic Essence",
	"🥁🎻 The Music Of AI's Mind"
	]
	selected_title = random.choice(titles)
	st.markdown(f"{selected_title}")

	def new_lines_joiner(prefix_str):
	"""
	A placeholder function if you needed to parse out the node name from prefix.
	If not used, you can remove it. This is just a stub to illustrate
	how you might handle the 'click NodeID "URL" "Tooltip" "Target"' pattern.
	"""
	return prefix_str # or parse out the node name

	if __name__ == "__main__":
	main()