Spaces:

mgbam
/

StoryVerseWeaver

Sleeping

App Files Files Community

StoryVerseWeaver / app.py

mgbam

Update app.py

32333bf verified about 1 month ago

raw

history blame

24 kB

	# algoforge_prime/app.py
	import gradio as gr
	import os
	import time # For progress updates

	from core.llm_clients import initialize_all_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED
	from core.generation_engine import generate_initial_solutions
	from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult
	from core.evolution_engine import evolve_solution
	from prompts.system_prompts import get_system_prompt
	from prompts.prompt_templates import format_code_test_analysis_user_prompt

	# --- Application Configuration (Models, Defaults) ---
	AVAILABLE_MODELS_CONFIG = {}
	UI_DEFAULT_MODEL_KEY = None

	# Populate with Gemini models if API is configured
	if GEMINI_API_CONFIGURED:
	AVAILABLE_MODELS_CONFIG.update({
	"Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
	"Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
	})
	UI_DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"

	# Populate with Hugging Face models if API is configured
	if HF_API_CONFIGURED:
	AVAILABLE_MODELS_CONFIG.update({
	"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
	"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
	"CodeLlama 7B Instruct (HF)": {"id": "codellama/CodeLlama-7b-Instruct-hf", "type": "hf"}, # Smaller CodeLlama
	})
	if not UI_DEFAULT_MODEL_KEY: # If Gemini isn't configured, default to an HF model
	UI_DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"

	# Absolute fallback if no models could be configured
	if not AVAILABLE_MODELS_CONFIG:
	print("CRITICAL APP ERROR: No models could be configured. Check API keys in Space Secrets.")
	AVAILABLE_MODELS_CONFIG["No Models Available (Check API Keys)"] = {"id": "dummy", "type": "none"}
	UI_DEFAULT_MODEL_KEY = "No Models Available (Check API Keys)"
	elif not UI_DEFAULT_MODEL_KEY and AVAILABLE_MODELS_CONFIG:
	UI_DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS_CONFIG.keys())[0] # Pick first available if default somehow not set

	# --- Main Orchestration Logic for Gradio ---
	def run_algoforge_simulation_orchestrator(
	problem_type_selected: str,
	problem_description_text: str,
	initial_hints_text: str,
	user_provided_tests_code: str,
	num_initial_solutions_to_gen: int,
	selected_model_ui_key: str,
	genesis_temp: float, genesis_max_tokens: int,
	critique_temp: float, critique_max_tokens: int,
	evolution_temp: float, evolution_max_tokens: int,
	progress=gr.Progress(track_tqdm=True) # Gradio progress tracker
	):
	progress(0, desc="Initializing AlgoForge Prime™...")
	log_entries = [f"AlgoForge Prime™ Cycle Starting at {time.strftime('%Y-%m-%d %H:%M:%S')}"]
	start_time = time.time()

	# Basic input validation
	if not problem_description_text.strip():
	error_msg = "CRITICAL INPUT ERROR: Problem Description is mandatory. Please describe the problem."
	log_entries.append(error_msg)
	return error_msg, "", "", "\n".join(log_entries), "" # Return 5 values for outputs

	current_model_config = AVAILABLE_MODELS_CONFIG.get(selected_model_ui_key)
	if not current_model_config or current_model_config["type"] == "none":
	error_msg = f"CRITICAL CONFIG ERROR: No valid LLM selected ('{selected_model_ui_key}'). This usually means API keys are missing or failed to initialize. Check Space Secrets and restart."
	log_entries.append(error_msg)
	return error_msg, "", "", "\n".join(log_entries), ""

	log_entries.append(f"Selected Model: {selected_model_ui_key} (Type: {current_model_config['type']}, ID: {current_model_config['id']})")
	log_entries.append(f"Problem Type: {problem_type_selected}")
	log_entries.append(f"User Unit Tests Provided: {'Yes' if user_provided_tests_code.strip() else 'No'}")

	# Prepare LLM configurations for each stage
	llm_config_genesis = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": genesis_temp, "max_tokens": genesis_max_tokens}
	llm_config_critique = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": critique_temp, "max_tokens": critique_max_tokens}
	llm_config_evolution = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": evolution_temp, "max_tokens": evolution_max_tokens}

	# --- STAGE 1: GENESIS ---
	progress(0.1, desc="Stage 1: Genesis Engine - Generating Initial Solutions...")
	log_entries.append("\n------ STAGE 1: GENESIS ENGINE ------")

	initial_raw_solutions = generate_initial_solutions(
	problem_description_text, initial_hints_text, problem_type_selected,
	num_initial_solutions_to_gen, llm_config_genesis
	)
	log_entries.append(f"Genesis Engine produced {len(initial_raw_solutions)} raw solution candidate(s).")
	for i, sol_text in enumerate(initial_raw_solutions):
	log_entries.append(f" Candidate {i+1} (Raw Snippet): {str(sol_text)[:120]}...")

	# --- STAGE 2: CRITIQUE & AUTOMATED EVALUATION ---
	progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
	log_entries.append("\n------ STAGE 2: CRITIQUE CRUCIBLE & AUTOMATED EVALUATION ------")

	evaluated_candidates_list = [] # Stores dicts: {"id": ..., "solution_text": ..., "evaluation_result": EvaluationResult}

	for i, candidate_solution_text in enumerate(initial_raw_solutions):
	current_progress = 0.3 + ( (i + 1) / num_initial_solutions_to_gen ) * 0.35 # Progress for evaluation stage
	progress(current_progress, desc=f"Evaluating Candidate {i+1} of {num_initial_solutions_to_gen}...")
	log_entries.append(f"\n--- Evaluating Candidate {i+1} ---")

	# The evaluation_engine handles if candidate_solution_text itself is an error string
	evaluation_obj = evaluate_solution_candidate( # type: EvaluationResult
	candidate_solution_text, problem_description_text, problem_type_selected,
	user_provided_tests_code, llm_config_critique
	)

	log_entries.append(f" Final Combined Score: {evaluation_obj.score}/10")
	log_entries.append(f" Automated Tests: {evaluation_obj.passed_tests}/{evaluation_obj.total_tests} passed.")
	if evaluation_obj.execution_summary: log_entries.append(f" Execution Summary: {evaluation_obj.execution_summary}")
	log_entries.append(f" LLM Critique (Snippet): {str(evaluation_obj.critique_text)[:150]}...")

	evaluated_candidates_list.append({
	"id": i + 1,
	"solution_text": candidate_solution_text, # Store original text, even if it was an error from genesis
	"evaluation_result": evaluation_obj
	})

	# Format display for initial solutions & evaluations
	initial_solutions_display_markdown = []
	for data in evaluated_candidates_list:
	initial_solutions_display_markdown.append(
	f"Candidate {data['id']}:\n"
	f"```python\n{data['solution_text']}\n```\n\n" # Assuming python for display, adjust if problem_type varies widely
	f"Evaluation Verdict (Combined Score: {data['evaluation_result'].score}/10):\n"
	f"{data['evaluation_result'].critique_text}\n---"
	)

	# --- STAGE 3: SELECTION OF CHAMPION ---
	progress(0.7, desc="Stage 3: Selecting Champion Candidate...")
	log_entries.append("\n------ STAGE 3: CHAMPION SELECTION ------")

	# Filter out candidates that were errors from genesis OR had very low evaluation scores (e.g., score of 0 from evaluation)
	# We want to select a champion that is actually a piece of code/algorithm, not an error message.
	potentially_viable_candidates = [
	cand for cand in evaluated_candidates_list
	if cand["evaluation_result"] and cand["evaluation_result"].score > 0 and \
	cand["solution_text"] and not cand["solution_text"].startswith("ERROR")
	]

	if not potentially_viable_candidates:
	final_error_msg = "No viable candidate solutions found after generation and evaluation. All attempts may have failed or scored too low."
	log_entries.append(f" CRITICAL: {final_error_msg}")
	return "\n\n".join(initial_solutions_display_markdown), final_error_msg, "", "\n".join(log_entries), ""

	potentially_viable_candidates.sort(key=lambda x: x["evaluation_result"].score, reverse=True)
	champion_candidate_data = potentially_viable_candidates[0]

	log_entries.append(f"Champion Selected: Candidate {champion_candidate_data['id']} "
	f"(Solution Snippet: {champion_candidate_data['solution_text'][:60]}...) "
	f"with evaluation score {champion_candidate_data['evaluation_result'].score}/10.")

	champion_display_markdown = (
	f"**Champion Candidate ID: {champion_candidate_data['id']} "
	f"(Original Combined Score: {champion_candidate_data['evaluation_result'].score}/10):**\n"
	f"```python\n{champion_candidate_data['solution_text']}\n```\n\n"
	f"Original Comprehensive Evaluation for this Champion:\n"
	f"{champion_candidate_data['evaluation_result'].critique_text}"
	)

	# --- STAGE 4: EVOLUTIONARY FORGE ---
	progress(0.75, desc="Stage 4: Evolutionary Forge - Refining Champion...")
	log_entries.append("\n------ STAGE 4: EVOLUTIONARY FORGE ------")

	evolved_solution_code = evolve_solution(
	champion_candidate_data["solution_text"],
	champion_candidate_data["evaluation_result"].critique_text, # Pass the full critique
	champion_candidate_data["evaluation_result"].score,
	problem_description_text,
	problem_type_selected,
	llm_config_evolution
	)
	log_entries.append(f"Raw Evolved Solution Text (Snippet): {str(evolved_solution_code)[:150]}...")

	evolved_solution_display_markdown = ""
	ai_test_analysis_markdown = "" # For LLM explanation of unit test results of evolved code

	if evolved_solution_code.startswith("ERROR"):
	evolved_solution_display_markdown = f"Evolution Stage Failed:\n{evolved_solution_code}"
	else:
	evolved_solution_display_markdown = f"✨ AlgoForge Prime™ Evolved Artifact ✨:\n```python\n{evolved_solution_code}\n```"

	# Optionally, re-evaluate the evolved solution with unit tests if provided and applicable
	if "python" in problem_type_selected.lower() and user_provided_tests_code.strip():
	progress(0.9, desc="Post-Evolution: Re-running Automated Tests on Evolved Code...")
	log_entries.append("\n--- Post-Evolution Sanity Check (Automated Tests on Evolved Code) ---")

	# Use a low temperature for this critique to focus on test results rather than creative critique
	# The critique part here is mostly for consistency, primary goal is test execution.
	evolved_critique_config = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": 0.2, "max_tokens": critique_max_tokens}

	evolved_code_eval_result = evaluate_solution_candidate( # type: EvaluationResult
	evolved_solution_code, problem_description_text, problem_type_selected,
	user_provided_tests_code, evolved_critique_config
	)

	evolved_solution_display_markdown += (
	f"\n\nPost-Evolution Automated Test Results (Simulated):\n"
	f"{evolved_code_eval_result.execution_summary}\n"
	f"Passed: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests}\n"
	)
	log_entries.append(f" Evolved Code Test Results: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests} passed. "
	f"Summary: {evolved_code_eval_result.execution_summary}")

	# Get LLM to explain the test results of the evolved code
	if evolved_code_eval_result.total_tests > 0 : # Only if tests were run
	progress(0.95, desc="Post-Evolution: AI Analyzing Test Results...")
	log_entries.append("\n--- AI Analysis of Evolved Code's Test Results ---")
	analysis_user_prompt = format_code_test_analysis_user_prompt(
	evolved_solution_code,
	user_provided_tests_code,
	evolved_code_eval_result.execution_summary # Pass the summary string
	)
	analysis_system_prompt = get_system_prompt("code_execution_explainer")

	# Use a config for analysis - can be same as critique or specialized
	llm_analysis_config = {"type": current_model_config["type"], "model_id": current_model_config["id"],
	"temp": 0.3, "max_tokens": critique_max_tokens + 100} # A bit more tokens for explanation

	from core.llm_clients import call_huggingface_api, call_gemini_api # Re-import for clarity or use a dispatcher

	explanation_response_obj = None
	if llm_analysis_config["type"] == "hf":
	explanation_response_obj = call_huggingface_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
	elif llm_analysis_config["type"] == "google_gemini":
	explanation_response_obj = call_gemini_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)

	if explanation_response_obj and explanation_response_obj.success:
	ai_test_analysis_markdown = f"AI Analysis of Evolved Code's Test Performance:\n{explanation_response_obj.text}"
	log_entries.append(f" AI Test Analysis (Snippet): {explanation_response_obj.text[:100]}...")
	elif explanation_response_obj:
	ai_test_analysis_markdown = f"AI Analysis of Test Performance Failed:\n{explanation_response_obj.error}"
	log_entries.append(f" AI Test Analysis Error: {explanation_response_obj.error}")


	total_time = time.time() - start_time
	log_entries.append(f"\nAlgoForge Prime™ Cycle Complete. Total time: {total_time:.2f} seconds.")
	progress(1.0, desc="Cycle Complete!")

	return "\n\n".join(initial_solutions_display_markdown), champion_display_markdown, evolved_solution_display_markdown, "\n".join(log_entries), ai_test_analysis_markdown


	# --- Gradio UI Definition ---
	# (This section is largely similar to the previous app.py, with updated input/output connections)

	intro_markdown = """
	# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution (v2)
	This enhanced version uses a modular codebase and demonstrates a conceptual workflow for AI-assisted algorithm discovery,
	featuring (simulated) unit testing for Python code if provided.

	API Keys Required in Space Secrets:
	- `GOOGLE_API_KEY` (Primary): For Google Gemini API models. Ensure the "Generative Language API" (or similar) is enabled for your project.
	- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
	If keys are missing or invalid, corresponding models will be unavailable.
	"""

	# Determine API status for UI message
	ui_token_status_md = ""
	if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
	ui_token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER GOOGLE_API_KEY NOR HF_TOKEN are configured or working correctly. The application will not be able to call any LLMs.</p>"
	else:
	if GEMINI_API_CONFIGURED:
	ui_token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected and configured.</p>"
	else:
	ui_token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing or failed to configure. Gemini API models will be disabled.</p>"

	if HF_API_CONFIGURED:
	ui_token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected and client initialized.</p>"
	else:
	ui_token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing or client failed to initialize. Hugging Face models will be disabled.</p>"


	with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"), title="AlgoForge Prime™ Modular v2") as app_demo:
	gr.Markdown(intro_markdown)
	gr.HTML(ui_token_status_md)

	if not AVAILABLE_MODELS_CONFIG or UI_DEFAULT_MODEL_KEY == "No Models Available (Check API Keys)":
	gr.Markdown("<h2 style='color:red;'>No LLM models are available. Please check your API key configurations in this Space's Secrets and restart the Space. The application cannot function without at least one working API configuration.</h2>")
	else:
	with gr.Row():
	# Input Column
	with gr.Column(scale=2): # Input column slightly wider
	gr.Markdown("## 💡 1. Define the Challenge")
	problem_type_dropdown = gr.Dropdown(
	choices=["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design", "Pseudocode Refinement"],
	label="Type of Problem / Algorithm", value="Python Algorithm with Tests",
	info="Select '...with Tests' to enable (simulated) unit testing if you provide tests below."
	)
	problem_description_textbox = gr.Textbox(
	lines=5, label="Problem Description / Desired Outcome",
	placeholder="Example for 'Python Algorithm with Tests':\n`def calculate_factorial(n: int) -> int:`\nCalculates factorial of n. Should handle n=0 (returns 1) and raise ValueError for n<0."
	)
	initial_hints_textbox = gr.Textbox(
	lines=3, label="Initial Thoughts / Constraints / Seed Ideas (Optional)",
	placeholder="E.g., 'Prefer an iterative solution over recursive for factorial.' or 'Consider time complexity.'"
	)
	user_tests_textbox = gr.Textbox(
	lines=6, label="Python Unit Tests (Optional, one `assert` per line)",
	placeholder="assert calculate_factorial(0) == 1\nassert calculate_factorial(5) == 120\n# try: calculate_factorial(-1); assert False # Expected ValueError\n# except ValueError: assert True",
	info="For 'Python Algorithm with Tests'. Ensure function names match your problem description."
	)

	gr.Markdown("## ⚙️ 2. Configure The Forge")
	model_selection_dropdown = gr.Dropdown(
	choices=list(AVAILABLE_MODELS_CONFIG.keys()),
	value=UI_DEFAULT_MODEL_KEY if UI_DEFAULT_MODEL_KEY in AVAILABLE_MODELS_CONFIG else (list(AVAILABLE_MODELS_CONFIG.keys())[0] if AVAILABLE_MODELS_CONFIG else None),
	label="Select LLM Core Model",
	info="Ensure the corresponding API key (Google or HF) is configured in secrets."
	)
	num_initial_solutions_slider = gr.Slider(minimum=1, maximum=4, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)")

	with gr.Accordion("Advanced LLM Parameters (Expert Users)", open=False):
	with gr.Row():
	genesis_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Genesis Temp", info="Higher = more creative, Lower = more deterministic.") # Gemini range often 0-1
	genesis_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=768, step=64, label="Genesis Max Output Tokens")
	with gr.Row():
	critique_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.05, label="Critique Temp")
	critique_max_tokens_slider = gr.Slider(minimum=150, maximum=1024, value=512, step=64, label="Critique Max Output Tokens")
	with gr.Row():
	evolution_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.75, step=0.05, label="Evolution Temp")
	evolution_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=1024, step=64, label="Evolution Max Output Tokens")

	engage_button = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg", elem_id="engage_button_elem")

	# Output Column
	with gr.Column(scale=3): # Output column wider
	gr.Markdown("## 🔥 3. The Forge's Output")
	with gr.Tabs(elem_id="output_tabs_elem"):
	with gr.TabItem("📜 Initial Candidates & Evaluations", id="tab_initial_evals"):
	output_initial_solutions_markdown = gr.Markdown(label="Generated Solutions & Combined Evaluations")
	with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)", id="tab_champion"):
	output_champion_markdown = gr.Markdown(label="Top Pick for Refinement")
	with gr.TabItem("🌟 Evolved Artifact & Test Analysis", id="tab_evolved"):
	output_evolved_markdown = gr.Markdown(label="Refined Solution from Evolutionary Forge")
	output_ai_test_analysis_markdown = gr.Markdown(label="AI Analysis of Evolved Code's Test Performance")
	with gr.TabItem("🛠️ Interaction Log (Developer View)", id="tab_log"):
	output_interaction_log_markdown = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")

	# Connect button to the orchestration function
	engage_button.click(
	fn=run_algoforge_simulation_orchestrator,
	inputs=[
	problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox,
	num_initial_solutions_slider, model_selection_dropdown,
	genesis_temp_slider, genesis_max_tokens_slider,
	critique_temp_slider, critique_max_tokens_slider,
	evolution_temp_slider, evolution_max_tokens_slider
	],
	outputs=[
	output_initial_solutions_markdown, output_champion_markdown,
	output_evolved_markdown, output_interaction_log_markdown,
	output_ai_test_analysis_markdown
	]
	)

	gr.Markdown("---")
	gr.Markdown(
	"Disclaimer: This is a conceptual, educational demonstration. "
	"The (simulated) unit testing feature is for illustrative purposes. "
	"NEVER run LLM-generated code from an untrusted source in an unrestricted environment. "
	"Implementing robust and secure code sandboxing is complex and absolutely critical for safety in real-world applications. "
	"LLM outputs always require careful human review and verification."
	)

	# --- Entry Point for Running the Gradio App ---
	if __name__ == "__main__":
	print("="*80)
	print("AlgoForge Prime™ (Modular Version v2 with Simulated Testing) - Launching...")

	print(f" Google Gemini API Configured: {GEMINI_API_CONFIGURED}")
	print(f" Hugging Face API Configured: {HF_API_CONFIGURED}")

	if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
	print(" CRITICAL WARNING: No API keys seem to be configured. The application will likely be non-functional.")

	print(f" UI Default Model Key: {UI_DEFAULT_MODEL_KEY}")
	print(f" Available models for UI: {list(AVAILABLE_MODELS_CONFIG.keys())}")
	print("="*80)

	app_demo.launch(debug=True, server_name="0.0.0.0") # server_name="0.0.0.0" is often needed for Docker/Spaces