willco-afk commited on
Commit
da358c2
·
verified ·
1 Parent(s): e7ad764

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -105
app.py DELETED
@@ -1,105 +0,0 @@
1
- import gradio as gr
2
- import gym
3
- import numpy as np
4
- import random
5
- import time
6
-
7
- # Define the environment
8
- env = gym.make('FrozenLake-v1', is_slippery=False)
9
-
10
- # Initialize Q-table and hyperparameters
11
- state_space_size = env.observation_space.n
12
- action_space_size = env.action_space.n
13
-
14
- # Initialize a Q-table with zeros
15
- Q_table = np.zeros((state_space_size, action_space_size))
16
-
17
- # Define training parameters
18
- num_episodes = 10000
19
- learning_rate = 0.1
20
- discount_factor = 0.9
21
- epsilon = 0.95
22
- epsilon_decay = 0.995
23
- min_epsilon = 0.01
24
-
25
- # Train the Q-learning agent
26
- def train_agent():
27
- global epsilon, Q_table
28
-
29
- for episode in range(num_episodes):
30
- state = env.reset()
31
- total_reward = 0
32
- done = False
33
-
34
- while not done:
35
- # Epsilon-greedy policy: choose action with exploration vs exploitation
36
- if random.uniform(0, 1) < epsilon:
37
- action = env.action_space.sample() # Explore: Random action
38
- else:
39
- action = np.argmax(Q_table[state]) # Exploit: Best action from Q-table
40
-
41
- # Take the action and observe the new state and reward
42
- next_state, reward, done, _, info = env.step(action)
43
- total_reward += reward
44
-
45
- # Update the Q-value for the current state-action pair
46
- old_q_value = Q_table[state, action]
47
- future_q_value = np.max(Q_table[next_state])
48
- new_q_value = old_q_value + learning_rate * (reward + discount_factor * future_q_value - old_q_value)
49
- Q_table[state, action] = new_q_value
50
-
51
- # Update state
52
- state = next_state
53
-
54
- # Decay epsilon to reduce exploration over time
55
- epsilon = max(min_epsilon, epsilon * epsilon_decay)
56
-
57
- # Display progress every 1000 episodes
58
- if episode % 1000 == 0:
59
- print(f"Episode {episode}/{num_episodes}, Epsilon: {epsilon:.4f}, Total reward: {total_reward}")
60
-
61
- return Q_table
62
-
63
- # Define a function to run the agent after training
64
- def run_agent():
65
- state = env.reset()
66
- total_reward = 0
67
- done = False
68
- episode_steps = []
69
-
70
- while not done:
71
- # Choose the best action from the Q-table (exploitation)
72
- action = np.argmax(Q_table[state])
73
- next_state, reward, done, _, info = env.step(action)
74
- total_reward += reward
75
- episode_steps.append((state, action, reward))
76
-
77
- # Update state
78
- state = next_state
79
-
80
- # Pause to allow visualization (optional)
81
- time.sleep(0.1)
82
-
83
- return total_reward, episode_steps
84
-
85
- # Gradio interface for running the agent
86
- def gradio_interface():
87
- # First, train the agent
88
- train_agent()
89
-
90
- # Now run the agent and show results
91
- total_reward, steps = run_agent()
92
- print("Total Reward from the episode:", total_reward)
93
-
94
- # Generate a visual output
95
- return f"Total reward in the final episode: {total_reward}. The agent took {len(steps)} steps."
96
-
97
- # Set up Gradio Interface
98
- iface = gr.Interface(fn=gradio_interface,
99
- inputs=[], # No inputs required for now
100
- outputs="text",
101
- title="Reinforcement Learning Agent on FrozenLake",
102
- description="This app runs a trained reinforcement learning agent on the FrozenLake-v1 environment. The agent is trained using Q-learning and will try to navigate the lake to reach the goal.")
103
-
104
- if __name__ == "__main__":
105
- iface.launch()