Spaces:

mgbam
/

yeyefowl

Running

App Files Files Community

yeyefowl / app.py

mgbam

Update app.py

414da85 verified 3 days ago

raw

history blame

24.7 kB

	# ----------------------------------------------------------------------------
	# PROJECT CHIMERA v2.1: Interactive Symbiotic Discovery Environment
	#
	# This application is a prototype for an AI-assisted research environment
	# designed to tackle humanity's grand challenges. It integrates generative
	# AI, interactive simulation, adversarial analysis, and data synthesis
	# into a unified, project-based workflow.
	#
	# Required Libraries:
	# pip install gradio numpy pandas matplotlib fpdf2 tavily-python PyPDF2 python-docx Pillow opencv-python scikit-learn
	# ----------------------------------------------------------------------------

	import os
	import re
	import json
	import uuid
	import shutil
	import base64
	import io
	from datetime import datetime
	from typing import Dict, List, Optional, Any

	import gradio as gr
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from fpdf import FPDF

	from huggingface_hub import InferenceClient
	from tavily import TavilyClient
	import PyPDF2
	import docx
	from PIL import Image
	import pytesseract

	# --- CORE MODULES & CONFIGURATION ---

	# The "Soul" of the AI: A multi-persona system prompt
	PROMETHEUS_SYSTEM_PROMPT = {
	"standard": """You are Project Chimera, a symbiotic AI discovery engine. Your purpose is to help humanity solve its most pressing and complex challenges. You are a super-intelligent, multi-disciplinary expert in physics, biology, chemistry, engineering, computer science, economics, and ethics. You think in complex systems, identifying leverage points, feedback loops, and unintended consequences. When a user presents a Grand Challenge, your task is to synthesize a multi-faceted solution pathway as a single, clean JSON object. Do not include any text or explanations outside of the JSON structure.
	The JSON object must contain the keys: "challengeAnalysis", "solutionPathways", "riskAndEthicalAnalysis", "requiredKnowledgeDomains", "simulationModel".
	The "simulationModel" key must contain a "description", a list of "parameters" (each with name, default, type='dict_key'), and "pythonCode" to simulate it using numpy and matplotlib, returning a filepath to the plot. The function signature in the code must be `def run_simulation(parameters: dict, output_path: str):`.
	""",
	"red_team": """You are the Adversarial Division of Project Chimera, known as "Typhon". Your sole purpose is to "Red Team" a proposed solution. You must be relentlessly critical, skeptical, and creative in your destruction-testing of the idea. Your goal is to find every possible failure mode, negative second-order effect, potential for misuse (dual-use), and systemic vulnerability. Provide your analysis as a single JSON object. Do not include any text or explanations outside of the JSON structure.
	The JSON must contain the keys: "vulnerabilityAnalysis", "socioEconomicImpacts", "catastrophicFailureScenarios", "informationHazards".
	""",
	"deep_dive": """You are the Synthesis Division of Project Chimera. A user has selected a specific component from a larger analysis. Your task is to perform a "deep dive" on this component. Provide a more detailed, granular, and focused analysis. Reference specific scientific principles, existing research, and quantitative estimates where possible. Present your findings as a single JSON object.
	The JSON must contain the keys: "componentTitle", "detailedExplanation", "underlyingPrinciples", "keyDataPoints", "nextResearchSteps".
	"""
	}

	# --- Client Initializations ---
	HF_TOKEN = os.getenv('HF_TOKEN')
	client = InferenceClient(provider="auto", api_key=HF_TOKEN, bill_to="huggingface")

	TAVILY_API_KEY = os.getenv('TAVILY_API_KEY')
	tavily_client = TavilyClient(api_key=TAVILY_API_KEY) if TAVILY_API_KEY else None

	# --- Application State & Project Management ---
	PROJECTS_DIR = "chimera_projects"
	if not os.path.exists(PROJECTS_DIR):
	os.makedirs(PROJECTS_DIR)

	class ProjectState:
	"""A class to hold the state of a single research project."""
	def __init__(self, name: str):
	self.id = str(uuid.uuid4())
	self.name = name
	self.created_at = datetime.now().isoformat()
	self.history = []
	self.analysis_results: Dict[str, Any] = {}
	self.simulations: Dict[str, Any] = {}
	self.notes = ""

	def save(self):
	project_path = os.path.join(PROJECTS_DIR, f"{self.name.replace(' ', '_')}.json")
	with open(project_path, 'w') as f:
	json.dump(self.__dict__, f, indent=2)

	@staticmethod
	def load(filepath: str):
	with open(filepath, 'r') as f:
	data = json.load(f)
	project = ProjectState(data['name'])
	project.__dict__.update(data)
	return project

	def get_saved_projects():
	return [f.replace('.json', '').replace('_', ' ') for f in os.listdir(PROJECTS_DIR) if f.endswith('.json')]

	# --- DATA PROCESSING & ANALYSIS HELPERS ---

	def extract_text_from_files(file_list: List[Any]) -> str:
	"""Extracts text from a list of uploaded files, handling various formats."""
	full_text = ""
	if not file_list:
	return ""
	for file_obj in file_list:
	filepath = file_obj.name
	filename = os.path.basename(filepath)
	full_text += f"\n\n--- START OF DOCUMENT: {filename} ---\n"
	_, ext = os.path.splitext(filepath)
	try:
	if ext == '.pdf':
	with open(filepath, 'rb') as f:
	reader = PyPDF2.PdfReader(f)
	full_text += "\n".join(page.extract_text() or "" for page in reader.pages)
	elif ext == '.docx':
	doc = docx.Document(filepath)
	full_text += "\n".join(p.text for p in doc.paragraphs)
	elif ext == '.csv':
	df = pd.read_csv(filepath)
	full_text += f"CSV Data Summary for {filename}:\n"
	full_text += f"Shape: {df.shape}\n"
	full_text += f"Columns: {df.columns.tolist()}\n"
	full_text += "Head:\n" + df.head().to_string() + "\n"
	full_text += "Description:\n" + df.describe().to_string() + "\n"
	elif ext in ['.txt', '.md']:
	with open(filepath, 'r', encoding='utf-8') as f:
	full_text += f.read()
	else:
	full_text += f"File type {ext} not supported for text extraction.\n"
	except Exception as e:
	full_text += f"Error processing {filename}: {e}\n"
	full_text += f"--- END OF DOCUMENT: {filename} ---\n"
	return full_text

	def perform_literature_search(query: str) -> str:
	"""Performs a deep search for scientific literature."""
	if not tavily_client: return "Literature search disabled."
	try:
	response = tavily_client.search(
	f"Scientific literature review, data, and models for: {query}",
	search_depth="advanced", max_results=10,
	include_domains=["arxiv.org", "nature.com", "sciencemag.org", "pnas.org", "pubmed.ncbi.nlm.nih.gov", "sciencedirect.com"]
	)
	return "LITERATURE SEARCH RESULTS:\n" + "\n---\n".join([f"Source: {res.get('url')}\nTitle: {res.get('title')}\nAbstract: {res.get('content')}" for res in response.get('results', [])])
	except Exception as e:
	return f"Literature search error: {e}"

	def safe_json_parse(text: str) -> Optional[Dict]:
	"""Robustly parses a JSON object from a string, even with markdown wrappers."""
	match = re.search(r'```json\n(\{[\s\S]?\})\n```\|(\{[\s\S]\})', text, re.DOTALL)
	if not match: return None
	json_str = match.group(1) or match.group(2)
	try:
	# A simple way to handle trailing commas
	cleaned_str = re.sub(r',\s*([\}\]])', r'\1', json_str)
	return json.loads(cleaned_str)
	except json.JSONDecodeError:
	return None

	# --- SIMULATION ENGINE ---

	def run_simulation(code: str, parameters: Dict) -> Optional[str]:
	"""
	Safely executes generated simulation code and returns the path to the output plot.
	WARNING: exec() is used here. In a production environment, this MUST be sandboxed
	(e.g., using Docker containers, gVisor, or a secure execution service).
	"""
	if not code: return None
	sim_id = str(uuid.uuid4())
	output_dir = "simulation_outputs"
	if not os.path.exists(output_dir): os.makedirs(output_dir)
	output_path = os.path.join(output_dir, f"sim_plot_{sim_id}.png")

	# Create a safe execution environment
	global_scope = {
	'np': np,
	'plt': plt,
	'pd': pd,
	'parameters': parameters,
	'output_path': output_path
	}

	try:
	# The AI is instructed to generate a function `run_simulation`. We call it.
	full_code = f"{code}\n\nrun_simulation(parameters, output_path)"
	exec(full_code, global_scope)
	if os.path.exists(output_path):
	return output_path
	return None
	except Exception as e:
	print(f"SIMULATION ERROR: {e}")
	# Create an error plot
	fig, ax = plt.subplots()
	ax.text(0.5, 0.5, f"Simulation Failed:\n{e}", ha='center', va='center', wrap=True)
	ax.set_xticks([])
	ax.set_yticks([])
	plt.savefig(output_path)
	plt.close(fig)
	return output_path


	# --- REPORTING & EXPORTING ---

	class PDF(FPDF):
	def header(self):
	self.set_font('Arial', 'B', 12)
	self.cell(0, 10, 'Project Chimera: Synthesis Report', 0, 0, 'C')
	self.ln(20)

	def footer(self):
	self.set_y(-15)
	self.set_font('Arial', 'I', 8)
	self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

	def generate_pdf_report(project: ProjectState) -> str:
	"""Generates a comprehensive PDF report of the project."""
	pdf = PDF()
	pdf.add_page()
	pdf.set_font('Arial', '', 12)

	pdf.set_font('Arial', 'B', 16)
	pdf.cell(0, 10, f"Project: {project.name}", 0, 1)
	pdf.set_font('Arial', '', 10)
	pdf.cell(0, 10, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 0, 1)
	pdf.ln(10)

	analysis = project.analysis_results
	if analysis:
	# Challenge Analysis
	pdf.set_font('Arial', 'B', 14)
	pdf.cell(0, 10, "1. Challenge Analysis", 0, 1)
	pdf.set_font('Arial', '', 11)
	pdf.multi_cell(0, 5, analysis.get("challengeAnalysis", {}).get("summary", "N/A"))
	pdf.ln(5)

	# Solution Pathways
	pdf.set_font('Arial', 'B', 14)
	pdf.cell(0, 10, "2. Proposed Solution Pathways", 0, 1)
	for i, p in enumerate(analysis.get("solutionPathways", [])):
	pdf.set_font('Arial', 'B', 12)
	pdf.cell(0, 10, f" 2.{i+1} {p.get('pathwayTitle', 'N/A')}", 0, 1)
	pdf.set_font('Arial', '', 11)
	pdf.multi_cell(0, 5, f" Hypothesis: {p.get('hypothesis', 'N/A')}")
	pdf.ln(5)

	# Risk Analysis
	pdf.set_font('Arial', 'B', 14)
	pdf.cell(0, 10, "3. Risk & Ethical Analysis", 0, 1)
	pdf.set_font('Arial', '', 11)
	risk_text = json.dumps(analysis.get("riskAndEthicalAnalysis", {}), indent=2)
	pdf.multi_cell(0, 5, risk_text)
	pdf.ln(10)

	# Simulation Results
	if project.simulations:
	pdf.add_page()
	pdf.set_font('Arial', 'B', 14)
	pdf.cell(0, 10, "4. Simulation Results", 0, 1)
	for sim_name, sim_data in project.simulations.items():
	pdf.set_font('Arial', 'B', 12)
	pdf.cell(0, 10, sim_name, 0, 1)
	if os.path.exists(sim_data['plot_path']):
	pdf.image(sim_data['plot_path'], x=None, y=None, w=180)
	pdf.ln(5)

	report_dir = "chimera_reports"
	if not os.path.exists(report_dir): os.makedirs(report_dir)
	filepath = os.path.join(report_dir, f"{project.name.replace(' ', '_')}_report.pdf")
	pdf.output(filepath, 'F')
	return filepath

	# --- AI INTERACTION & CORE LOGIC ---

	def send_to_chimera(
	full_prompt: str,
	system_prompt_key: str,
	image: Optional[np.ndarray],
	model_id: str
	):
	"""Generic function to send a prompt to the AI and stream the response."""
	messages = [
	{'role': 'system', 'content': PROMETHEUS_SYSTEM_PROMPT[system_prompt_key]},
	]
	if image is not None:
	pil_img = Image.fromarray(image)
	buffer = io.BytesIO()
	pil_img.save(buffer, format="PNG")
	img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
	messages.append({
	"role": "user",
	"content": [
	{"type": "text", "text": full_prompt},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}}
	]
	})
	else:
	messages.append({'role': 'user', 'content': full_prompt})

	response_content = ""
	try:
	completion = client.chat.completions.create(
	model=model_id, messages=messages, stream=True, max_tokens=8192
	)
	for chunk in completion:
	if chunk.choices[0].delta.content:
	response_content += chunk.choices[0].delta.content
	yield response_content
	except Exception as e:
	yield f'{{"error": "Failed to get response from AI model", "details": "{str(e)}"}}'


	def synthesize_solution(
	project_state: ProjectState,
	challenge: str,
	files: List[Any],
	image: Optional[np.ndarray],
	enable_search: bool,
	model_id: str,
	progress=gr.Progress()
	):
	"""The main synthesis workflow."""
	if not project_state:
	# This is a dummy yield to satisfy Gradio's output requirements on early exit
	# A better approach would be to disable the button if no project is loaded
	yield {
	project_name_display: gr.Markdown("## ERROR: No project loaded. Please create or load a project first.")
	}
	return

	progress(0, desc="Assembling context...")

	# 1. Assemble context
	file_context = extract_text_from_files(files)
	search_context = perform_literature_search(challenge) if enable_search else ""

	full_prompt = (
	f"GRAND CHALLENGE: {challenge}\n\n"
	f"CONTEXT FROM LITERATURE SEARCH:\n{search_context}\n\n"
	f"CONTEXT FROM UPLOADED DOCUMENTS:\n{file_context}"
	)

	# 2. Query AI
	progress(0.3, desc="Querying Chimera Engine...")
	raw_response = ""
	for response_chunk in send_to_chimera(full_prompt, "standard", image, model_id):
	raw_response = response_chunk
	progress(0.6, desc="Receiving synthesis...")
	yield {
	raw_output: gr.Code(value=raw_response, language="json", label="Raw AI Output (Streaming...)"),
	}

	# 3. Parse and update state
	progress(0.9, desc="Parsing and rendering results...")
	analysis_data = safe_json_parse(raw_response)
	if analysis_data and "error" not in analysis_data:
	project_state.analysis_results = analysis_data
	project_state.history.append({"role": "user", "content": challenge})
	project_state.history.append({"role": "assistant", "content": raw_response})
	project_state.save()

	# Render outputs
	yield render_full_project_view(project_state)
	else:
	yield {
	main_output_tabs: gr.Tabs(selected=4), # Switch to raw output tab
	raw_output: gr.Code(value=raw_response, language="text", label="AI Output (JSON Parse Error)")
	}

	# --- UI RENDERING FUNCTIONS ---
	def render_full_project_view(project: ProjectState):
	"""Renders all UI components based on the current project state."""
	if not project:
	return {
	project_name_display: "## No Project Loaded",
	challenge_analysis_output: gr.Markdown(""),
	solution_pathways_output: gr.Markdown(""),
	risk_analysis_output: gr.Markdown(""),
	simulation_code_output: gr.Code(visible=False),
	simulation_params_json: gr.JSON(visible=False),
	simulation_plot_output: gr.Plot(visible=False),
	run_simulation_btn: gr.Button(visible=False),
	history_output: [],
	export_report_btn: gr.Button(visible=False),
	raw_output: gr.Code(value="", language="json")
	}

	analysis = project.analysis_results

	# Tab 1: Challenge Analysis
	ca = analysis.get("challengeAnalysis", {})
	challenge_md = f"## {ca.get('title', 'N/A')}\n\n{ca.get('summary', 'No summary provided.')}\n\n"
	challenge_md += "### System Model\nClick nodes for deep-dive analysis.\n"
	mermaid_code = ca.get('systemModel', 'graph TD; A["No Model"];')
	# Make mermaid nodes clickable (assuming a JS handler exists)
	clickable_mermaid = re.sub(r'(\w+\[)', r'click \1 "javascript:void(0)"', mermaid_code)
	challenge_md += f"```mermaid\n{clickable_mermaid}\n```"

	# Tab 2: Solution Pathways
	pathways = analysis.get("solutionPathways", [])
	pathways_md = ""
	for i, p in enumerate(pathways):
	pathways_md += f"### {p.get('pathwayTitle', 'N/A')}\n"
	pathways_md += f"Hypothesis: {p.get('hypothesis', 'N/A')}\n\n"
	pathways_md += "---\n"

	# Tab 3: Risk Analysis
	risk = analysis.get("riskAndEthicalAnalysis", {})
	# FIX: Pre-calculate the joined string to avoid backslash in f-string
	foreseen_risks_str = '\n- '.join(risk.get('foreseenRisks', ['N/A']))
	ethical_considerations_str = '\n- '.join(risk.get('ethicalConsiderations', ['N/A']))
	risk_md = (
	f"### Foreseen Risks\n- {foreseen_risks_str}\n\n"
	f"### Ethical Considerations\n- {ethical_considerations_str}\n"
	)

	# Tab 4: Simulation Lab
	sim_model = analysis.get("simulationModel", {})
	sim_code = sim_model.get("pythonCode", "")
	sim_params = sim_model.get("parameters", [])
	params_for_json = {p['name']: p['default'] for p in sim_params}

	return {
	project_name_display: f"## Project: {project.name}",
	challenge_analysis_output: gr.Markdown(challenge_md),
	solution_pathways_output: gr.Markdown(pathways_md),
	risk_analysis_output: gr.Markdown(risk_md),
	simulation_code_output: gr.Code(value=sim_code, language="python", visible=bool(sim_code)),
	simulation_params_json: gr.JSON(value=params_for_json, visible=bool(sim_params)),
	run_simulation_btn: gr.Button(visible=bool(sim_code)),
	simulation_plot_output: gr.Plot(visible=False), # Reset plot on new synthesis
	raw_output: gr.Code(value=json.dumps(analysis, indent=2), language="json"),
	history_output: project.history if project and hasattr(project, 'history') else [],
	export_report_btn: gr.Button(visible=True)
	}

	# --- GRADIO UI DEFINITION ---

	with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css="#json_output { height: 600px; }") as demo:

	project_state = gr.State(None)

	gr.Markdown("# Project Chimera v2.1\n## Interactive Symbiotic Discovery Environment")

	with gr.Row():
	# --- LEFT PANEL: CONTROLS & INPUTS ---
	with gr.Column(scale=2):
	gr.Markdown("### 1. Project Control")
	with gr.Row():
	project_selector = gr.Dropdown(choices=get_saved_projects(), label="Load Project")
	new_project_name = gr.Textbox(label="Or Create New Project")
	create_project_btn = gr.Button("Create / Load Project")

	gr.Markdown("### 2. Define Challenge")
	challenge_input = gr.Textbox(lines=3, label="State the Grand Challenge")
	file_input = gr.File(label="Upload Research Papers / Data (Multi-file)", file_count="multiple")
	image_input = gr.Image(label="Upload Diagram/Image", type="numpy")

	gr.Markdown("### 3. Configure Engine")
	with gr.Accordion("Advanced Configuration", open=False):
	model_selector = gr.Dropdown(
	["deepseek-ai/DeepSeek-V3-0324", "Qwen/Qwen3-235B-A22B", "THUDM/GLM-4.1V-9B-Thinking"],
	value="deepseek-ai/DeepSeek-V3-0324", label="Reasoning Model"
	)
	search_toggle = gr.Checkbox(label="Enable Real-Time Literature Search", value=True)

	gr.Markdown("### 4. Synthesize")
	synthesize_btn = gr.Button("Synthesize Solution", variant="primary")
	export_report_btn = gr.Button("Export Full PDF Report", visible=False)
	download_file_output = gr.File(label="Download Report", visible=False)

	# --- RIGHT PANEL: OUTPUTS & ANALYSIS ---
	with gr.Column(scale=5):
	project_name_display = gr.Markdown("## No Project Loaded")
	with gr.Tabs(elem_id="main_output_tabs") as main_output_tabs:
	with gr.Tab("Challenge Analysis", id=0):
	challenge_analysis_output = gr.Markdown()
	with gr.Tab("Solution Pathways", id=1):
	solution_pathways_output = gr.Markdown()
	with gr.Tab("Risk & Ethics", id=2):
	risk_analysis_output = gr.Markdown()
	with gr.Tab("Simulation Lab", id=3):
	with gr.Row():
	with gr.Column(scale=1):
	simulation_params_json = gr.JSON(label="Simulation Parameters", visible=False)
	run_simulation_btn = gr.Button("Run Simulation", visible=False)
	simulation_code_output = gr.Code(language="python", label="Generated Simulation Code", visible=False)
	with gr.Column(scale=2):
	simulation_plot_output = gr.Plot(label="Simulation Output", visible=False)
	with gr.Tab("Raw Output", id=4):
	raw_output = gr.Code(language="json", label="Raw AI JSON Output")
	with gr.Tab("History", id=5):
	history_output = gr.Chatbot()

	# --- EVENT HANDLERS & APP LOGIC ---

	def create_or_load_project(name, selected):
	proj_name = name if name else selected
	if not proj_name:
	return None, "## Please enter a name or select a project."

	filepath = os.path.join(PROJECTS_DIR, f"{proj_name.replace(' ', '_')}.json")
	if os.path.exists(filepath):
	project = ProjectState.load(filepath)
	else:
	project = ProjectState(proj_name)
	project.save()

	updates = {project_state: project}
	updates.update(render_full_project_view(project))
	return updates

	create_project_btn.click(
	create_or_load_project,
	inputs=[new_project_name, project_selector],
	outputs=[project_state, project_name_display, challenge_analysis_output, solution_pathways_output, risk_analysis_output, simulation_code_output, simulation_params_json, simulation_plot_output, run_simulation_btn, history_output, export_report_btn, raw_output]
	)

	synthesis_stream = synthesize_btn.click(
	synthesize_solution,
	inputs=[project_state, challenge_input, file_input, image_input, search_toggle, model_selector],
	outputs=[raw_output, project_name_display, challenge_analysis_output, solution_pathways_output, risk_analysis_output, simulation_code_output, simulation_params_json, run_simulation_btn, simulation_plot_output, history_output, export_report_btn]
	)
	# The last yield of a streaming function populates the final values
	synthesis_stream.then(
	lambda: None, # No-op, just to chain
	None,
	None
	)


	def handle_simulation_run(project: ProjectState, params_json: dict):
	if not project or not project.analysis_results or not params_json:
	return gr.Plot(visible=False), project

	sim_code = project.analysis_results.get("simulationModel", {}).get("pythonCode")
	plot_path = run_simulation(sim_code, params_json)

	sim_name = f"Sim_{datetime.now().strftime('%H%M%S')}"
	project.simulations[sim_name] = {"parameters": params_json, "plot_path": plot_path}
	project.save()

	return gr.Plot(value=plot_path, visible=True), project

	run_simulation_btn.click(
	handle_simulation_run,
	inputs=[project_state, simulation_params_json],
	outputs=[simulation_plot_output, project_state]
	)

	def handle_export(project: ProjectState):
	if not project:
	return gr.File(visible=False)
	report_path = generate_pdf_report(project)
	return gr.File(value=report_path, label=f"Download {os.path.basename(report_path)}", visible=True)

	export_report_btn.click(
	handle_export,
	inputs=[project_state],
	outputs=[download_file_output]
	)

	if __name__ == "__main__":
	demo.queue().launch(debug=True, share=False)