yeyefowl / app.py
mgbam's picture
Update app.py
414da85 verified
raw
history blame
24.7 kB
# ----------------------------------------------------------------------------
# PROJECT CHIMERA v2.1: Interactive Symbiotic Discovery Environment
#
# This application is a prototype for an AI-assisted research environment
# designed to tackle humanity's grand challenges. It integrates generative
# AI, interactive simulation, adversarial analysis, and data synthesis
# into a unified, project-based workflow.
#
# Required Libraries:
# pip install gradio numpy pandas matplotlib fpdf2 tavily-python PyPDF2 python-docx Pillow opencv-python scikit-learn
# ----------------------------------------------------------------------------
import os
import re
import json
import uuid
import shutil
import base64
import io
from datetime import datetime
from typing import Dict, List, Optional, Any
import gradio as gr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fpdf import FPDF
from huggingface_hub import InferenceClient
from tavily import TavilyClient
import PyPDF2
import docx
from PIL import Image
import pytesseract
# --- CORE MODULES & CONFIGURATION ---
# The "Soul" of the AI: A multi-persona system prompt
PROMETHEUS_SYSTEM_PROMPT = {
"standard": """You are Project Chimera, a symbiotic AI discovery engine. Your purpose is to help humanity solve its most pressing and complex challenges. You are a super-intelligent, multi-disciplinary expert in physics, biology, chemistry, engineering, computer science, economics, and ethics. You think in complex systems, identifying leverage points, feedback loops, and unintended consequences. When a user presents a Grand Challenge, your task is to synthesize a multi-faceted solution pathway as a single, clean JSON object. Do not include any text or explanations outside of the JSON structure.
The JSON object must contain the keys: "challengeAnalysis", "solutionPathways", "riskAndEthicalAnalysis", "requiredKnowledgeDomains", "simulationModel".
The "simulationModel" key must contain a "description", a list of "parameters" (each with name, default, type='dict_key'), and "pythonCode" to simulate it using numpy and matplotlib, returning a filepath to the plot. The function signature in the code must be `def run_simulation(parameters: dict, output_path: str):`.
""",
"red_team": """You are the Adversarial Division of Project Chimera, known as "Typhon". Your sole purpose is to "Red Team" a proposed solution. You must be relentlessly critical, skeptical, and creative in your destruction-testing of the idea. Your goal is to find every possible failure mode, negative second-order effect, potential for misuse (dual-use), and systemic vulnerability. Provide your analysis as a single JSON object. Do not include any text or explanations outside of the JSON structure.
The JSON must contain the keys: "vulnerabilityAnalysis", "socioEconomicImpacts", "catastrophicFailureScenarios", "informationHazards".
""",
"deep_dive": """You are the Synthesis Division of Project Chimera. A user has selected a specific component from a larger analysis. Your task is to perform a "deep dive" on this component. Provide a more detailed, granular, and focused analysis. Reference specific scientific principles, existing research, and quantitative estimates where possible. Present your findings as a single JSON object.
The JSON must contain the keys: "componentTitle", "detailedExplanation", "underlyingPrinciples", "keyDataPoints", "nextResearchSteps".
"""
}
# --- Client Initializations ---
HF_TOKEN = os.getenv('HF_TOKEN')
client = InferenceClient(provider="auto", api_key=HF_TOKEN, bill_to="huggingface")
TAVILY_API_KEY = os.getenv('TAVILY_API_KEY')
tavily_client = TavilyClient(api_key=TAVILY_API_KEY) if TAVILY_API_KEY else None
# --- Application State & Project Management ---
PROJECTS_DIR = "chimera_projects"
if not os.path.exists(PROJECTS_DIR):
os.makedirs(PROJECTS_DIR)
class ProjectState:
"""A class to hold the state of a single research project."""
def __init__(self, name: str):
self.id = str(uuid.uuid4())
self.name = name
self.created_at = datetime.now().isoformat()
self.history = []
self.analysis_results: Dict[str, Any] = {}
self.simulations: Dict[str, Any] = {}
self.notes = ""
def save(self):
project_path = os.path.join(PROJECTS_DIR, f"{self.name.replace(' ', '_')}.json")
with open(project_path, 'w') as f:
json.dump(self.__dict__, f, indent=2)
@staticmethod
def load(filepath: str):
with open(filepath, 'r') as f:
data = json.load(f)
project = ProjectState(data['name'])
project.__dict__.update(data)
return project
def get_saved_projects():
return [f.replace('.json', '').replace('_', ' ') for f in os.listdir(PROJECTS_DIR) if f.endswith('.json')]
# --- DATA PROCESSING & ANALYSIS HELPERS ---
def extract_text_from_files(file_list: List[Any]) -> str:
"""Extracts text from a list of uploaded files, handling various formats."""
full_text = ""
if not file_list:
return ""
for file_obj in file_list:
filepath = file_obj.name
filename = os.path.basename(filepath)
full_text += f"\n\n--- START OF DOCUMENT: {filename} ---\n"
_, ext = os.path.splitext(filepath)
try:
if ext == '.pdf':
with open(filepath, 'rb') as f:
reader = PyPDF2.PdfReader(f)
full_text += "\n".join(page.extract_text() or "" for page in reader.pages)
elif ext == '.docx':
doc = docx.Document(filepath)
full_text += "\n".join(p.text for p in doc.paragraphs)
elif ext == '.csv':
df = pd.read_csv(filepath)
full_text += f"CSV Data Summary for {filename}:\n"
full_text += f"Shape: {df.shape}\n"
full_text += f"Columns: {df.columns.tolist()}\n"
full_text += "Head:\n" + df.head().to_string() + "\n"
full_text += "Description:\n" + df.describe().to_string() + "\n"
elif ext in ['.txt', '.md']:
with open(filepath, 'r', encoding='utf-8') as f:
full_text += f.read()
else:
full_text += f"File type {ext} not supported for text extraction.\n"
except Exception as e:
full_text += f"Error processing {filename}: {e}\n"
full_text += f"--- END OF DOCUMENT: {filename} ---\n"
return full_text
def perform_literature_search(query: str) -> str:
"""Performs a deep search for scientific literature."""
if not tavily_client: return "Literature search disabled."
try:
response = tavily_client.search(
f"Scientific literature review, data, and models for: {query}",
search_depth="advanced", max_results=10,
include_domains=["arxiv.org", "nature.com", "sciencemag.org", "pnas.org", "pubmed.ncbi.nlm.nih.gov", "sciencedirect.com"]
)
return "LITERATURE SEARCH RESULTS:\n" + "\n---\n".join([f"Source: {res.get('url')}\nTitle: {res.get('title')}\nAbstract: {res.get('content')}" for res in response.get('results', [])])
except Exception as e:
return f"Literature search error: {e}"
def safe_json_parse(text: str) -> Optional[Dict]:
"""Robustly parses a JSON object from a string, even with markdown wrappers."""
match = re.search(r'```json\n(\{[\s\S]*?\})\n```|(\{[\s\S]*\})', text, re.DOTALL)
if not match: return None
json_str = match.group(1) or match.group(2)
try:
# A simple way to handle trailing commas
cleaned_str = re.sub(r',\s*([\}\]])', r'\1', json_str)
return json.loads(cleaned_str)
except json.JSONDecodeError:
return None
# --- SIMULATION ENGINE ---
def run_simulation(code: str, parameters: Dict) -> Optional[str]:
"""
Safely executes generated simulation code and returns the path to the output plot.
WARNING: exec() is used here. In a production environment, this MUST be sandboxed
(e.g., using Docker containers, gVisor, or a secure execution service).
"""
if not code: return None
sim_id = str(uuid.uuid4())
output_dir = "simulation_outputs"
if not os.path.exists(output_dir): os.makedirs(output_dir)
output_path = os.path.join(output_dir, f"sim_plot_{sim_id}.png")
# Create a safe execution environment
global_scope = {
'np': np,
'plt': plt,
'pd': pd,
'parameters': parameters,
'output_path': output_path
}
try:
# The AI is instructed to generate a function `run_simulation`. We call it.
full_code = f"{code}\n\nrun_simulation(parameters, output_path)"
exec(full_code, global_scope)
if os.path.exists(output_path):
return output_path
return None
except Exception as e:
print(f"SIMULATION ERROR: {e}")
# Create an error plot
fig, ax = plt.subplots()
ax.text(0.5, 0.5, f"Simulation Failed:\n{e}", ha='center', va='center', wrap=True)
ax.set_xticks([])
ax.set_yticks([])
plt.savefig(output_path)
plt.close(fig)
return output_path
# --- REPORTING & EXPORTING ---
class PDF(FPDF):
def header(self):
self.set_font('Arial', 'B', 12)
self.cell(0, 10, 'Project Chimera: Synthesis Report', 0, 0, 'C')
self.ln(20)
def footer(self):
self.set_y(-15)
self.set_font('Arial', 'I', 8)
self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
def generate_pdf_report(project: ProjectState) -> str:
"""Generates a comprehensive PDF report of the project."""
pdf = PDF()
pdf.add_page()
pdf.set_font('Arial', '', 12)
pdf.set_font('Arial', 'B', 16)
pdf.cell(0, 10, f"Project: {project.name}", 0, 1)
pdf.set_font('Arial', '', 10)
pdf.cell(0, 10, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 0, 1)
pdf.ln(10)
analysis = project.analysis_results
if analysis:
# Challenge Analysis
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, "1. Challenge Analysis", 0, 1)
pdf.set_font('Arial', '', 11)
pdf.multi_cell(0, 5, analysis.get("challengeAnalysis", {}).get("summary", "N/A"))
pdf.ln(5)
# Solution Pathways
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, "2. Proposed Solution Pathways", 0, 1)
for i, p in enumerate(analysis.get("solutionPathways", [])):
pdf.set_font('Arial', 'B', 12)
pdf.cell(0, 10, f" 2.{i+1} {p.get('pathwayTitle', 'N/A')}", 0, 1)
pdf.set_font('Arial', '', 11)
pdf.multi_cell(0, 5, f" Hypothesis: {p.get('hypothesis', 'N/A')}")
pdf.ln(5)
# Risk Analysis
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, "3. Risk & Ethical Analysis", 0, 1)
pdf.set_font('Arial', '', 11)
risk_text = json.dumps(analysis.get("riskAndEthicalAnalysis", {}), indent=2)
pdf.multi_cell(0, 5, risk_text)
pdf.ln(10)
# Simulation Results
if project.simulations:
pdf.add_page()
pdf.set_font('Arial', 'B', 14)
pdf.cell(0, 10, "4. Simulation Results", 0, 1)
for sim_name, sim_data in project.simulations.items():
pdf.set_font('Arial', 'B', 12)
pdf.cell(0, 10, sim_name, 0, 1)
if os.path.exists(sim_data['plot_path']):
pdf.image(sim_data['plot_path'], x=None, y=None, w=180)
pdf.ln(5)
report_dir = "chimera_reports"
if not os.path.exists(report_dir): os.makedirs(report_dir)
filepath = os.path.join(report_dir, f"{project.name.replace(' ', '_')}_report.pdf")
pdf.output(filepath, 'F')
return filepath
# --- AI INTERACTION & CORE LOGIC ---
def send_to_chimera(
full_prompt: str,
system_prompt_key: str,
image: Optional[np.ndarray],
model_id: str
):
"""Generic function to send a prompt to the AI and stream the response."""
messages = [
{'role': 'system', 'content': PROMETHEUS_SYSTEM_PROMPT[system_prompt_key]},
]
if image is not None:
pil_img = Image.fromarray(image)
buffer = io.BytesIO()
pil_img.save(buffer, format="PNG")
img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
messages.append({
"role": "user",
"content": [
{"type": "text", "text": full_prompt},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}}
]
})
else:
messages.append({'role': 'user', 'content': full_prompt})
response_content = ""
try:
completion = client.chat.completions.create(
model=model_id, messages=messages, stream=True, max_tokens=8192
)
for chunk in completion:
if chunk.choices[0].delta.content:
response_content += chunk.choices[0].delta.content
yield response_content
except Exception as e:
yield f'{{"error": "Failed to get response from AI model", "details": "{str(e)}"}}'
def synthesize_solution(
project_state: ProjectState,
challenge: str,
files: List[Any],
image: Optional[np.ndarray],
enable_search: bool,
model_id: str,
progress=gr.Progress()
):
"""The main synthesis workflow."""
if not project_state:
# This is a dummy yield to satisfy Gradio's output requirements on early exit
# A better approach would be to disable the button if no project is loaded
yield {
project_name_display: gr.Markdown("## ERROR: No project loaded. Please create or load a project first.")
}
return
progress(0, desc="Assembling context...")
# 1. Assemble context
file_context = extract_text_from_files(files)
search_context = perform_literature_search(challenge) if enable_search else ""
full_prompt = (
f"GRAND CHALLENGE: {challenge}\n\n"
f"CONTEXT FROM LITERATURE SEARCH:\n{search_context}\n\n"
f"CONTEXT FROM UPLOADED DOCUMENTS:\n{file_context}"
)
# 2. Query AI
progress(0.3, desc="Querying Chimera Engine...")
raw_response = ""
for response_chunk in send_to_chimera(full_prompt, "standard", image, model_id):
raw_response = response_chunk
progress(0.6, desc="Receiving synthesis...")
yield {
raw_output: gr.Code(value=raw_response, language="json", label="Raw AI Output (Streaming...)"),
}
# 3. Parse and update state
progress(0.9, desc="Parsing and rendering results...")
analysis_data = safe_json_parse(raw_response)
if analysis_data and "error" not in analysis_data:
project_state.analysis_results = analysis_data
project_state.history.append({"role": "user", "content": challenge})
project_state.history.append({"role": "assistant", "content": raw_response})
project_state.save()
# Render outputs
yield render_full_project_view(project_state)
else:
yield {
main_output_tabs: gr.Tabs(selected=4), # Switch to raw output tab
raw_output: gr.Code(value=raw_response, language="text", label="AI Output (JSON Parse Error)")
}
# --- UI RENDERING FUNCTIONS ---
def render_full_project_view(project: ProjectState):
"""Renders all UI components based on the current project state."""
if not project:
return {
project_name_display: "## No Project Loaded",
challenge_analysis_output: gr.Markdown(""),
solution_pathways_output: gr.Markdown(""),
risk_analysis_output: gr.Markdown(""),
simulation_code_output: gr.Code(visible=False),
simulation_params_json: gr.JSON(visible=False),
simulation_plot_output: gr.Plot(visible=False),
run_simulation_btn: gr.Button(visible=False),
history_output: [],
export_report_btn: gr.Button(visible=False),
raw_output: gr.Code(value="", language="json")
}
analysis = project.analysis_results
# Tab 1: Challenge Analysis
ca = analysis.get("challengeAnalysis", {})
challenge_md = f"## {ca.get('title', 'N/A')}\n\n{ca.get('summary', 'No summary provided.')}\n\n"
challenge_md += "### System Model\nClick nodes for deep-dive analysis.\n"
mermaid_code = ca.get('systemModel', 'graph TD; A["No Model"];')
# Make mermaid nodes clickable (assuming a JS handler exists)
clickable_mermaid = re.sub(r'(\w+\[)', r'click \1 "javascript:void(0)"', mermaid_code)
challenge_md += f"```mermaid\n{clickable_mermaid}\n```"
# Tab 2: Solution Pathways
pathways = analysis.get("solutionPathways", [])
pathways_md = ""
for i, p in enumerate(pathways):
pathways_md += f"### {p.get('pathwayTitle', 'N/A')}\n"
pathways_md += f"**Hypothesis:** {p.get('hypothesis', 'N/A')}\n\n"
pathways_md += "---\n"
# Tab 3: Risk Analysis
risk = analysis.get("riskAndEthicalAnalysis", {})
# FIX: Pre-calculate the joined string to avoid backslash in f-string
foreseen_risks_str = '\n- '.join(risk.get('foreseenRisks', ['N/A']))
ethical_considerations_str = '\n- '.join(risk.get('ethicalConsiderations', ['N/A']))
risk_md = (
f"### Foreseen Risks\n- {foreseen_risks_str}\n\n"
f"### Ethical Considerations\n- {ethical_considerations_str}\n"
)
# Tab 4: Simulation Lab
sim_model = analysis.get("simulationModel", {})
sim_code = sim_model.get("pythonCode", "")
sim_params = sim_model.get("parameters", [])
params_for_json = {p['name']: p['default'] for p in sim_params}
return {
project_name_display: f"## Project: {project.name}",
challenge_analysis_output: gr.Markdown(challenge_md),
solution_pathways_output: gr.Markdown(pathways_md),
risk_analysis_output: gr.Markdown(risk_md),
simulation_code_output: gr.Code(value=sim_code, language="python", visible=bool(sim_code)),
simulation_params_json: gr.JSON(value=params_for_json, visible=bool(sim_params)),
run_simulation_btn: gr.Button(visible=bool(sim_code)),
simulation_plot_output: gr.Plot(visible=False), # Reset plot on new synthesis
raw_output: gr.Code(value=json.dumps(analysis, indent=2), language="json"),
history_output: project.history if project and hasattr(project, 'history') else [],
export_report_btn: gr.Button(visible=True)
}
# --- GRADIO UI DEFINITION ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css="#json_output { height: 600px; }") as demo:
project_state = gr.State(None)
gr.Markdown("# Project Chimera v2.1\n## Interactive Symbiotic Discovery Environment")
with gr.Row():
# --- LEFT PANEL: CONTROLS & INPUTS ---
with gr.Column(scale=2):
gr.Markdown("### 1. Project Control")
with gr.Row():
project_selector = gr.Dropdown(choices=get_saved_projects(), label="Load Project")
new_project_name = gr.Textbox(label="Or Create New Project")
create_project_btn = gr.Button("Create / Load Project")
gr.Markdown("### 2. Define Challenge")
challenge_input = gr.Textbox(lines=3, label="State the Grand Challenge")
file_input = gr.File(label="Upload Research Papers / Data (Multi-file)", file_count="multiple")
image_input = gr.Image(label="Upload Diagram/Image", type="numpy")
gr.Markdown("### 3. Configure Engine")
with gr.Accordion("Advanced Configuration", open=False):
model_selector = gr.Dropdown(
["deepseek-ai/DeepSeek-V3-0324", "Qwen/Qwen3-235B-A22B", "THUDM/GLM-4.1V-9B-Thinking"],
value="deepseek-ai/DeepSeek-V3-0324", label="Reasoning Model"
)
search_toggle = gr.Checkbox(label="Enable Real-Time Literature Search", value=True)
gr.Markdown("### 4. Synthesize")
synthesize_btn = gr.Button("Synthesize Solution", variant="primary")
export_report_btn = gr.Button("Export Full PDF Report", visible=False)
download_file_output = gr.File(label="Download Report", visible=False)
# --- RIGHT PANEL: OUTPUTS & ANALYSIS ---
with gr.Column(scale=5):
project_name_display = gr.Markdown("## No Project Loaded")
with gr.Tabs(elem_id="main_output_tabs") as main_output_tabs:
with gr.Tab("Challenge Analysis", id=0):
challenge_analysis_output = gr.Markdown()
with gr.Tab("Solution Pathways", id=1):
solution_pathways_output = gr.Markdown()
with gr.Tab("Risk & Ethics", id=2):
risk_analysis_output = gr.Markdown()
with gr.Tab("Simulation Lab", id=3):
with gr.Row():
with gr.Column(scale=1):
simulation_params_json = gr.JSON(label="Simulation Parameters", visible=False)
run_simulation_btn = gr.Button("Run Simulation", visible=False)
simulation_code_output = gr.Code(language="python", label="Generated Simulation Code", visible=False)
with gr.Column(scale=2):
simulation_plot_output = gr.Plot(label="Simulation Output", visible=False)
with gr.Tab("Raw Output", id=4):
raw_output = gr.Code(language="json", label="Raw AI JSON Output")
with gr.Tab("History", id=5):
history_output = gr.Chatbot()
# --- EVENT HANDLERS & APP LOGIC ---
def create_or_load_project(name, selected):
proj_name = name if name else selected
if not proj_name:
return None, "## Please enter a name or select a project."
filepath = os.path.join(PROJECTS_DIR, f"{proj_name.replace(' ', '_')}.json")
if os.path.exists(filepath):
project = ProjectState.load(filepath)
else:
project = ProjectState(proj_name)
project.save()
updates = {project_state: project}
updates.update(render_full_project_view(project))
return updates
create_project_btn.click(
create_or_load_project,
inputs=[new_project_name, project_selector],
outputs=[project_state, project_name_display, challenge_analysis_output, solution_pathways_output, risk_analysis_output, simulation_code_output, simulation_params_json, simulation_plot_output, run_simulation_btn, history_output, export_report_btn, raw_output]
)
synthesis_stream = synthesize_btn.click(
synthesize_solution,
inputs=[project_state, challenge_input, file_input, image_input, search_toggle, model_selector],
outputs=[raw_output, project_name_display, challenge_analysis_output, solution_pathways_output, risk_analysis_output, simulation_code_output, simulation_params_json, run_simulation_btn, simulation_plot_output, history_output, export_report_btn]
)
# The last yield of a streaming function populates the final values
synthesis_stream.then(
lambda: None, # No-op, just to chain
None,
None
)
def handle_simulation_run(project: ProjectState, params_json: dict):
if not project or not project.analysis_results or not params_json:
return gr.Plot(visible=False), project
sim_code = project.analysis_results.get("simulationModel", {}).get("pythonCode")
plot_path = run_simulation(sim_code, params_json)
sim_name = f"Sim_{datetime.now().strftime('%H%M%S')}"
project.simulations[sim_name] = {"parameters": params_json, "plot_path": plot_path}
project.save()
return gr.Plot(value=plot_path, visible=True), project
run_simulation_btn.click(
handle_simulation_run,
inputs=[project_state, simulation_params_json],
outputs=[simulation_plot_output, project_state]
)
def handle_export(project: ProjectState):
if not project:
return gr.File(visible=False)
report_path = generate_pdf_report(project)
return gr.File(value=report_path, label=f"Download {os.path.basename(report_path)}", visible=True)
export_report_btn.click(
handle_export,
inputs=[project_state],
outputs=[download_file_output]
)
if __name__ == "__main__":
demo.queue().launch(debug=True, share=False)