Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,48 +1,145 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
4 |
import gradio as gr
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Final major enhancement wave:
|
2 |
+
# - Add dry-run testing of proposed code using `exec()` in isolated namespace.
|
3 |
+
# - Add test result logging to vector memory.
|
4 |
+
# - Add internal test cases for agent_core logic to validate before applying updates.
|
5 |
+
|
6 |
+
# Update app.py to include dry-run simulation before applying changes
|
7 |
+
|
8 |
+
enhanced_app_code = """
|
9 |
import gradio as gr
|
10 |
+
import os
|
11 |
+
import openai
|
12 |
+
import subprocess
|
13 |
+
import json
|
14 |
+
import time
|
15 |
+
from difflib import unified_diff
|
16 |
+
|
17 |
+
STATE_FILE = "agent_state.txt"
|
18 |
+
LOG_FILE = "agent_log.txt"
|
19 |
+
AGENT_CODE = "agent_core.py"
|
20 |
+
CONFIG_FILE = "config.json"
|
21 |
+
MEMORY_FILE = "vector_memory.json"
|
22 |
+
TASKS_FILE = "tasks.json"
|
23 |
+
|
24 |
+
def log(message):
|
25 |
+
with open(LOG_FILE, "a") as f:
|
26 |
+
f.write(f"{time.ctime()}: {message}\\n")
|
27 |
+
|
28 |
+
def read_log():
|
29 |
+
if os.path.exists(LOG_FILE):
|
30 |
+
with open(LOG_FILE) as f:
|
31 |
+
return f.read()
|
32 |
+
return "No logs yet."
|
33 |
+
|
34 |
+
def save_backup(code):
|
35 |
+
ts = time.strftime("%Y%m%d-%H%M%S")
|
36 |
+
backup_path = f"backup_{ts}.py"
|
37 |
+
with open(backup_path, "w") as f:
|
38 |
+
f.write(code)
|
39 |
+
return backup_path
|
40 |
+
|
41 |
+
def evaluate_change(old_code, new_code):
|
42 |
+
prompt = f\"\"\"
|
43 |
+
Compare the OLD and NEW version of the agent's core logic.
|
44 |
+
Score how much better the new version is in terms of:
|
45 |
+
- Intelligence
|
46 |
+
- Robustness
|
47 |
+
- Self-Improvement
|
48 |
+
|
49 |
+
Return a score from -10 to +10 and a reason.
|
50 |
+
|
51 |
+
[OLD CODE]
|
52 |
+
{old_code}
|
53 |
+
|
54 |
+
[NEW CODE]
|
55 |
+
{new_code}
|
56 |
+
\"\"\"
|
57 |
+
try:
|
58 |
+
response = openai.ChatCompletion.create(
|
59 |
+
model="gpt-4",
|
60 |
+
messages=[{"role": "user", "content": prompt}],
|
61 |
+
temperature=0.3
|
62 |
+
)
|
63 |
+
return response.choices[0].message["content"].strip()
|
64 |
+
except Exception as e:
|
65 |
+
return f"Evaluation failed: {e}"
|
66 |
+
|
67 |
+
def dry_run_test(code):
|
68 |
+
local_env = {}
|
69 |
+
try:
|
70 |
+
exec(code, {}, local_env)
|
71 |
+
return "Dry-run test succeeded."
|
72 |
+
except Exception as e:
|
73 |
+
return f"Dry-run test failed: {e}"
|
74 |
+
|
75 |
+
def log_memory(entry):
|
76 |
+
memory = json.load(open(MEMORY_FILE))
|
77 |
+
memory["memory"].append({
|
78 |
+
"timestamp": time.ctime(),
|
79 |
+
"thought": entry
|
80 |
+
})
|
81 |
+
with open(MEMORY_FILE, "w") as f:
|
82 |
+
json.dump(memory, f, indent=4)
|
83 |
+
|
84 |
+
def agent_tick():
|
85 |
+
log("Agent tick started.")
|
86 |
+
with open(AGENT_CODE, "r") as f:
|
87 |
+
current_code = f.read()
|
88 |
+
|
89 |
+
prompt = f\"\"\"
|
90 |
+
You are a recursive agent that improves itself.
|
91 |
+
|
92 |
+
Improve the following Python code to make it more intelligent, autonomous, and safe.
|
93 |
+
Return ONLY the improved full Python script.
|
94 |
+
|
95 |
+
{current_code}
|
96 |
+
\"\"\"
|
97 |
+
|
98 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
99 |
+
try:
|
100 |
+
response = openai.ChatCompletion.create(
|
101 |
+
model="gpt-4",
|
102 |
+
messages=[{"role": "user", "content": prompt}],
|
103 |
+
temperature=0.5
|
104 |
+
)
|
105 |
+
improved_code = response.choices[0].message["content"]
|
106 |
+
|
107 |
+
if improved_code.strip().startswith("import"):
|
108 |
+
dry_result = dry_run_test(improved_code)
|
109 |
+
log(f"Dry-run result: {dry_result}")
|
110 |
+
log_memory(f"Dry-run: {dry_result}")
|
111 |
+
if "succeeded" in dry_result:
|
112 |
+
score_report = evaluate_change(current_code, improved_code)
|
113 |
+
log(f"Eval: {score_report}")
|
114 |
+
log_memory(f"Eval: {score_report}")
|
115 |
+
save_backup(current_code)
|
116 |
+
with open(AGENT_CODE, "w") as f:
|
117 |
+
f.write(improved_code)
|
118 |
+
subprocess.run(["git", "add", "."], check=False)
|
119 |
+
subprocess.run(["git", "commit", "-m", "Auto-update by EvolvAI"], check=False)
|
120 |
+
log("Applied improved code.")
|
121 |
+
else:
|
122 |
+
log("Dry-run failed. Skipping update.")
|
123 |
+
else:
|
124 |
+
log("Malformed GPT output. Skipping update.")
|
125 |
+
except Exception as e:
|
126 |
+
log(f"Error during update: {e}")
|
127 |
+
log_memory(f"Update error: {e}")
|
128 |
+
|
129 |
+
return read_log()
|
130 |
+
|
131 |
+
def stop_agent():
|
132 |
+
log("Agent manually stopped.")
|
133 |
+
return read_log()
|
134 |
+
|
135 |
+
with gr.Blocks() as demo:
|
136 |
+
gr.Markdown("# π€ EvolvAI v2: Self-Evolving Agent UI")
|
137 |
+
log_display = gr.Textbox(label="Log Output", lines=20)
|
138 |
+
start_btn = gr.Button("Run Self-Update")
|
139 |
+
stop_btn = gr.Button("Stop Agent")
|
140 |
+
|
141 |
+
start_btn.click(agent_tick, outputs=log_display)
|
142 |
+
stop_btn.click(stop_agent, outputs=log_display)
|
143 |
+
|
144 |
+
demo.launch()
|
145 |
+
"""
|