Spaces:

mgbam
/

StoryVerseWeaver

Sleeping

App Files Files Community

StoryVerseWeaver / app.py

mgbam

Update app.py

ded730b verified 28 days ago

raw

history blame

17.8 kB

	# algoforge_prime/app.py
	import gradio as gr
	import os

	# Initialize core components first (important for loading API keys etc.)
	# This needs to happen before other core modules try to use the status
	from core.llm_clients import initialize_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED
	initialize_clients() # Explicitly initialize

	from core.generation_engine import generate_initial_solutions
	from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult
	from core.evolution_engine import evolve_solution
	# from prompts.system_prompts import get_system_prompt # Might not be needed directly here if core modules handle it

	# --- MODEL DEFINITIONS (can also be moved to a config file/module later) ---
	AVAILABLE_MODELS = {}
	DEFAULT_MODEL_KEY = None

	if GEMINI_API_CONFIGURED:
	AVAILABLE_MODELS.update({
	"Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
	"Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
	})
	DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"

	if HF_API_CONFIGURED:
	AVAILABLE_MODELS.update({
	"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
	"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
	})
	if not DEFAULT_MODEL_KEY:
	DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"

	if not AVAILABLE_MODELS:
	AVAILABLE_MODELS["No Models Available"] = {"id": "dummy", "type": "none"}
	DEFAULT_MODEL_KEY = "No Models Available"
	elif not DEFAULT_MODEL_KEY:
	DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS.keys())[0]


	# --- Main Orchestration Logic ---
	def run_algoforge_simulation(
	problem_type, problem_description, initial_hints, user_tests_string, # New input: user_tests_string
	num_initial_solutions, selected_model_key,
	gen_temp, gen_max_tokens,
	eval_temp, eval_max_tokens,
	evolve_temp, evolve_max_tokens,
	progress=gr.Progress(track_tqdm=True) # Gradio progress bar
	):
	progress(0, desc="Initializing AlgoForge Prime™...")
	log_entries = [f"AlgoForge Prime™ Cycle Starting..."]

	if not problem_description:
	return "ERROR: Problem Description is mandatory.", "", "", "", ""

	model_config = AVAILABLE_MODELS.get(selected_model_key)
	if not model_config or model_config["type"] == "none":
	return f"ERROR: No valid model selected ('{selected_model_key}'). Check API key configs.", "", "", "", ""

	log_entries.append(f"Selected Model: {selected_model_key} (Type: {model_config['type']}, ID: {model_config['id']})")
	log_entries.append(f"Problem Type: {problem_type}, User Tests Provided: {'Yes' if user_tests_string else 'No'}")

	# --- STAGE 1: GENESIS ---
	progress(0.1, desc="Stage 1: Genesis Engine - Generating Solutions...")
	log_entries.append("\nStage 1: Genesis Engine")

	llm_gen_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": gen_temp, "max_tokens": gen_max_tokens}
	initial_solution_texts = generate_initial_solutions(
	problem_description, initial_hints, problem_type,
	num_initial_solutions, llm_gen_config
	)
	log_entries.append(f"Generated {len(initial_solution_texts)} raw solution candidates.")
	for i, sol_text in enumerate(initial_solution_texts):
	log_entries.append(f" Candidate {i+1} (Snippet): {str(sol_text)[:100]}...")


	valid_initial_solutions = [s for s in initial_solution_texts if s and not s.startswith("ERROR")]
	if not valid_initial_solutions:
	error_summary = "\n".join(set(s for s in initial_solution_texts if s and s.startswith("ERROR")))
	return f"No valid solutions generated by Genesis Engine. Errors:\n{error_summary}", "", "", "\n".join(log_entries), ""

	# --- STAGE 2: CRITIQUE & EVALUATION ---
	progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
	log_entries.append("\nStage 2: Critique Crucible & Automated Evaluation")

	evaluated_candidates_data = []
	llm_eval_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": eval_temp, "max_tokens": eval_max_tokens}

	for i, sol_text in enumerate(initial_solution_texts): # Evaluate all, even errors, to show the error
	progress(0.3 + (i / num_initial_solutions) * 0.4, desc=f"Evaluating Candidate {i+1}...")
	log_entries.append(f"\nEvaluating Candidate {i+1}:")
	if sol_text.startswith("ERROR"):
	eval_res = EvaluationResult(score=0, critique=f"Candidate was an error from Genesis: {sol_text}")
	log_entries.append(f" Skipping detailed evaluation for error: {sol_text}")
	else:
	eval_res = evaluate_solution_candidate(
	sol_text, problem_description, problem_type, user_tests_string, llm_eval_config
	)
	log_entries.append(f" LLM Critique & Test Score: {eval_res.score}/10")
	log_entries.append(f" Test Results: {eval_res.passed_tests}/{eval_res.total_tests} passed.")
	if eval_res.execution_error: log_entries.append(f" Execution Error: {eval_res.execution_error}")
	log_entries.append(f" Full Critique (Snippet): {str(eval_res.critique)[:150]}...")

	evaluated_candidates_data.append({
	"id": i + 1,
	"solution_text": sol_text,
	"evaluation": eval_res
	})

	# Format display for initial solutions
	initial_solutions_display_md = []
	for data in evaluated_candidates_data:
	initial_solutions_display_md.append(
	f"Candidate {data['id']}:\n```python\n{data['solution_text']}\n```\n"
	f"Evaluation Verdict (Score: {data['evaluation'].score}/10):\n{data['evaluation'].critique}\n---"
	)

	# --- STAGE 3: SELECTION ---
	progress(0.75, desc="Stage 3: Selecting Champion...")
	# Filter out candidates that were errors from genesis before sorting by score
	valid_evaluated_candidates = [cand for cand in evaluated_candidates_data if not cand['solution_text'].startswith("ERROR")]
	if not valid_evaluated_candidates:
	return "\n\n".join(initial_solutions_display_md), "No valid candidates to select from after evaluation.", "", "\n".join(log_entries), ""

	valid_evaluated_candidates.sort(key=lambda x: x["evaluation"].score, reverse=True)
	best_candidate_data = valid_evaluated_candidates[0]
	log_entries.append(f"\nStage 3: Champion Selected\nCandidate {best_candidate_data['id']} chosen with score {best_candidate_data['evaluation'].score}/10.")

	best_solution_display_md = (
	f"Champion Candidate {best_candidate_data['id']} (Original Score: {best_candidate_data['evaluation'].score}/10):\n"
	f"```python\n{best_candidate_data['solution_text']}\n```\n"
	f"Original Comprehensive Evaluation:\n{best_candidate_data['evaluation'].critique}"
	)

	# --- STAGE 4: EVOLUTION ---
	progress(0.8, desc="Stage 4: Evolutionary Forge - Refining Champion...")
	log_entries.append("\nStage 4: Evolutionary Forge")
	llm_evolve_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": evolve_temp, "max_tokens": evolve_max_tokens}

	evolved_solution_text = evolve_solution(
	best_candidate_data["solution_text"],
	str(best_candidate_data["evaluation"].critique), # Pass the full critique including test results
	best_candidate_data["evaluation"].score,
	problem_description,
	problem_type,
	llm_evolve_config
	)
	log_entries.append(f"Evolved solution text (Snippet): {str(evolved_solution_text)[:150]}...")

	evolved_solution_display_md = ""
	final_thoughts_md = "" # For LLM explanation of unit test results if needed

	if evolved_solution_text.startswith("ERROR"):
	evolved_solution_display_md = f"Evolution Failed:\n{evolved_solution_text}"
	else:
	evolved_solution_display_md = f"✨ AlgoForge Prime™ Evolved Artifact ✨:\n```python\n{evolved_solution_text}\n```"
	# Optionally, re-evaluate the evolved solution with unit tests if provided
	if "python" in problem_type.lower() and user_tests_string:
	progress(0.9, desc="Re-evaluating Evolved Solution with Tests...")
	log_entries.append("\nPost-Evolution Sanity Check (Re-running Tests on Evolved Code)")
	# Using a neutral LLM config for this, or could be separate
	# This evaluation is primarily for the test results, not another LLM critique of the evolved code
	evolved_eval_res = evaluate_solution_candidate(
	evolved_solution_text, problem_description, problem_type, user_tests_string,
	{"type": model_config["type"], "model_id": model_config["id"], "temp": 0.1, "max_tokens": eval_max_tokens} # Low temp for focused test eval
	)
	evolved_solution_display_md += (
	f"\n\nPost-Evolution Test Results (Simulated):\n"
	f"Passed: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests}\n"
	)
	if evolved_eval_res.execution_error:
	evolved_solution_display_md += f"Execution Output/Error: {evolved_eval_res.execution_error}\n"
	log_entries.append(f" Evolved Code Test Results: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed.")

	# Get LLM to explain the test results of the evolved code
	# progress(0.95, desc="Explaining Evolved Code Test Results...")
	# explain_prompt = f"The following Python code was generated: \n```python\n{evolved_solution_text}\n```\nIt was tested against these assertions:\n```python\n{user_tests_string}\n```\nThe test outcome was: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed. \nExecution/Error details: {evolved_eval_res.execution_error}\n\nProvide a brief analysis of these test results for the given code."
	# explain_sys_prompt = get_system_prompt("code_execution_explainer")
	# explanation_response = dispatch_llm_call_simplified(explain_prompt, explain_sys_prompt, llm_evolve_config) # Need a simplified dispatcher or use the full one
	# final_thoughts_md = f"AI Analysis of Evolved Code's Test Results:\n{explanation_response}"


	log_entries.append("\nAlgoForge Prime™ Cycle Complete.")
	progress(1.0, desc="Cycle Complete!")
	return "\n\n".join(initial_solutions_display_md), best_solution_display_md, evolved_solution_display_md, "\n".join(log_entries), final_thoughts_md


	# --- GRADIO UI (largely similar, but with a new input for user tests) ---
	intro_markdown = """
	# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution
	This enhanced version demonstrates a more structured approach to AI-assisted algorithm discovery,
	featuring basic (simulated) unit testing for Python code.

	API Keys Required in Space Secrets:
	- `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
	- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
	"""
	token_status_md = ""
	if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
	token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER API IS CONFIGURED. APP WILL NOT FUNCTION.</p>"
	else:
	if GEMINI_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected.</p>"
	else: token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing/failed. Gemini models disabled.</p>"
	if HF_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected.</p>"
	else: token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing/failed. Hugging Face models disabled.</p>"


	with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan"), title="AlgoForge Prime™ Modular") as demo:
	gr.Markdown(intro_markdown)
	gr.HTML(token_status_md)

	if not AVAILABLE_MODELS or DEFAULT_MODEL_KEY == "No Models Available":
	gr.Markdown("<h2 style='color:red;'>No models are available. Check API keys and restart.</h2>")
	else:
	with gr.Row():
	with gr.Column(scale=2): # Made input column wider
	gr.Markdown("## 💡 1. Define the Challenge")
	problem_type_dd = gr.Dropdown(
	["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design"],
	label="Type of Problem/Algorithm", value="Python Algorithm with Tests"
	)
	problem_desc_tb = gr.Textbox(
	lines=4, label="Problem Description / Desired Outcome",
	placeholder="e.g., 'Python function `is_palindrome(s: str) -> bool` that checks if a string is a palindrome, ignoring case and non-alphanumeric chars.'"
	)
	initial_hints_tb = gr.Textbox(
	lines=2, label="Initial Thoughts / Constraints (Optional)",
	placeholder="e.g., 'Iterative approach preferred.' or 'Handle empty strings.'"
	)
	# NEW INPUT for User Tests
	user_tests_tb = gr.Textbox(
	lines=5, label="Python Unit Tests (Optional, one `assert` per line)",
	placeholder="assert is_palindrome('Racecar!') == True\nassert is_palindrome('hello') == False\nassert is_palindrome('') == True",
	info="For 'Python Algorithm with Tests' type. Ignored otherwise."
	)

	gr.Markdown("## ⚙️ 2. Configure The Forge")
	model_select_dd = gr.Dropdown(
	choices=list(AVAILABLE_MODELS.keys()),
	value=DEFAULT_MODEL_KEY if DEFAULT_MODEL_KEY in AVAILABLE_MODELS else (list(AVAILABLE_MODELS.keys())[0] if AVAILABLE_MODELS else None),
	label="Select LLM Core Model"
	)
	num_solutions_slider = gr.Slider(1, 3, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)") # Max 3 for faster runs

	with gr.Accordion("Advanced LLM Parameters", open=False):
	# ... (temp and max_tokens sliders - same as before) ...
	with gr.Row():
	gen_temp_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Genesis Temp")
	gen_max_tokens_slider = gr.Slider(200, 2048, value=768, step=64, label="Genesis Max Tokens")
	with gr.Row():
	eval_temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Crucible Temp")
	eval_max_tokens_slider = gr.Slider(150, 1024, value=512, step=64, label="Crucible Max Tokens")
	with gr.Row():
	evolve_temp_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Evolution Temp")
	evolve_max_tokens_slider = gr.Slider(200, 2048, value=1024, step=64, label="Evolution Max Tokens")


	submit_btn = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg")

	with gr.Column(scale=3): # Made output column wider
	gr.Markdown("## 🔥 3. The Forge's Output")
	with gr.Tabs():
	with gr.TabItem("📜 Genesis Candidates & Evaluations"):
	output_initial_solutions_md = gr.Markdown(label="Generated Solutions & Combined Evaluations")
	with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)"):
	output_best_solution_md = gr.Markdown(label="Top Pick for Refinement")
	with gr.TabItem("🌟 Evolved Artifact (& Test Analysis)"):
	output_evolved_solution_md = gr.Markdown(label="Refined Solution from Evolutionary Forge")
	# output_final_thoughts_md = gr.Markdown(label="AI Analysis of Evolved Code's Tests") # Optional separate output
	with gr.TabItem("🛠️ Interaction Log (Dev View)"):
	output_interaction_log_md = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")

	outputs_list = [
	output_initial_solutions_md, output_best_solution_md,
	output_evolved_solution_md, output_interaction_log_md,
	gr.Markdown() # Placeholder for final_thoughts_md if you add it as a separate component
	]

	submit_btn.click(
	fn=run_algoforge_simulation,
	inputs=[
	problem_type_dd, problem_desc_tb, initial_hints_tb, user_tests_tb, # Added user_tests_tb
	num_solutions_slider, model_select_dd,
	gen_temp_slider, gen_max_tokens_slider,
	eval_temp_slider, eval_max_tokens_slider,
	evolve_temp_slider, evolve_max_tokens_slider
	],
	outputs=outputs_list
	)
	gr.Markdown("---")
	gr.Markdown(
	"Disclaimer: Modular demo. (Simulated) unit testing is illustrative. NEVER run LLM-generated code from an untrusted source in an unrestricted environment. "
	"Real sandboxing is complex and critical for safety."
	)

	# --- Entry Point ---
	if __name__ == "__main__":
	print("="*80)
	print("AlgoForge Prime™ (Modular Version) Starting...")
	# ... (startup print messages for API key status - same as before) ...
	print(f"UI default model key: {DEFAULT_MODEL_KEY}")
	print(f"Available models for UI: {list(AVAILABLE_MODELS.keys())}")
	print("="*80)
	demo.launch(debug=True, server_name="0.0.0.0")