Spaces:
Runtime error
Runtime error
Delete app.py
Browse files
app.py
DELETED
@@ -1,105 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import gym
|
3 |
-
import numpy as np
|
4 |
-
import random
|
5 |
-
import time
|
6 |
-
|
7 |
-
# Define the environment
|
8 |
-
env = gym.make('FrozenLake-v1', is_slippery=False)
|
9 |
-
|
10 |
-
# Initialize Q-table and hyperparameters
|
11 |
-
state_space_size = env.observation_space.n
|
12 |
-
action_space_size = env.action_space.n
|
13 |
-
|
14 |
-
# Initialize a Q-table with zeros
|
15 |
-
Q_table = np.zeros((state_space_size, action_space_size))
|
16 |
-
|
17 |
-
# Define training parameters
|
18 |
-
num_episodes = 10000
|
19 |
-
learning_rate = 0.1
|
20 |
-
discount_factor = 0.9
|
21 |
-
epsilon = 0.95
|
22 |
-
epsilon_decay = 0.995
|
23 |
-
min_epsilon = 0.01
|
24 |
-
|
25 |
-
# Train the Q-learning agent
|
26 |
-
def train_agent():
|
27 |
-
global epsilon, Q_table
|
28 |
-
|
29 |
-
for episode in range(num_episodes):
|
30 |
-
state = env.reset()
|
31 |
-
total_reward = 0
|
32 |
-
done = False
|
33 |
-
|
34 |
-
while not done:
|
35 |
-
# Epsilon-greedy policy: choose action with exploration vs exploitation
|
36 |
-
if random.uniform(0, 1) < epsilon:
|
37 |
-
action = env.action_space.sample() # Explore: Random action
|
38 |
-
else:
|
39 |
-
action = np.argmax(Q_table[state]) # Exploit: Best action from Q-table
|
40 |
-
|
41 |
-
# Take the action and observe the new state and reward
|
42 |
-
next_state, reward, done, _, info = env.step(action)
|
43 |
-
total_reward += reward
|
44 |
-
|
45 |
-
# Update the Q-value for the current state-action pair
|
46 |
-
old_q_value = Q_table[state, action]
|
47 |
-
future_q_value = np.max(Q_table[next_state])
|
48 |
-
new_q_value = old_q_value + learning_rate * (reward + discount_factor * future_q_value - old_q_value)
|
49 |
-
Q_table[state, action] = new_q_value
|
50 |
-
|
51 |
-
# Update state
|
52 |
-
state = next_state
|
53 |
-
|
54 |
-
# Decay epsilon to reduce exploration over time
|
55 |
-
epsilon = max(min_epsilon, epsilon * epsilon_decay)
|
56 |
-
|
57 |
-
# Display progress every 1000 episodes
|
58 |
-
if episode % 1000 == 0:
|
59 |
-
print(f"Episode {episode}/{num_episodes}, Epsilon: {epsilon:.4f}, Total reward: {total_reward}")
|
60 |
-
|
61 |
-
return Q_table
|
62 |
-
|
63 |
-
# Define a function to run the agent after training
|
64 |
-
def run_agent():
|
65 |
-
state = env.reset()
|
66 |
-
total_reward = 0
|
67 |
-
done = False
|
68 |
-
episode_steps = []
|
69 |
-
|
70 |
-
while not done:
|
71 |
-
# Choose the best action from the Q-table (exploitation)
|
72 |
-
action = np.argmax(Q_table[state])
|
73 |
-
next_state, reward, done, _, info = env.step(action)
|
74 |
-
total_reward += reward
|
75 |
-
episode_steps.append((state, action, reward))
|
76 |
-
|
77 |
-
# Update state
|
78 |
-
state = next_state
|
79 |
-
|
80 |
-
# Pause to allow visualization (optional)
|
81 |
-
time.sleep(0.1)
|
82 |
-
|
83 |
-
return total_reward, episode_steps
|
84 |
-
|
85 |
-
# Gradio interface for running the agent
|
86 |
-
def gradio_interface():
|
87 |
-
# First, train the agent
|
88 |
-
train_agent()
|
89 |
-
|
90 |
-
# Now run the agent and show results
|
91 |
-
total_reward, steps = run_agent()
|
92 |
-
print("Total Reward from the episode:", total_reward)
|
93 |
-
|
94 |
-
# Generate a visual output
|
95 |
-
return f"Total reward in the final episode: {total_reward}. The agent took {len(steps)} steps."
|
96 |
-
|
97 |
-
# Set up Gradio Interface
|
98 |
-
iface = gr.Interface(fn=gradio_interface,
|
99 |
-
inputs=[], # No inputs required for now
|
100 |
-
outputs="text",
|
101 |
-
title="Reinforcement Learning Agent on FrozenLake",
|
102 |
-
description="This app runs a trained reinforcement learning agent on the FrozenLake-v1 environment. The agent is trained using Q-learning and will try to navigate the lake to reach the goal.")
|
103 |
-
|
104 |
-
if __name__ == "__main__":
|
105 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|