Spaces:

willco-afk
/

frozenslippery

Runtime error

App Files Files Community

willco-afk commited on Dec 22, 2024

Commit

da358c2

verified ·

1 Parent(s): e7ad764

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -105

app.py DELETED Viewed

@@ -1,105 +0,0 @@
-import gradio as gr
-import gym
-import numpy as np
-import random
-import time
-# Define the environment
-env = gym.make('FrozenLake-v1', is_slippery=False)
-# Initialize Q-table and hyperparameters
-state_space_size = env.observation_space.n
-action_space_size = env.action_space.n
-# Initialize a Q-table with zeros
-Q_table = np.zeros((state_space_size, action_space_size))
-# Define training parameters
-num_episodes = 10000
-learning_rate = 0.1
-discount_factor = 0.9
-epsilon = 0.95
-epsilon_decay = 0.995
-min_epsilon = 0.01
-# Train the Q-learning agent
-def train_agent():
-    global epsilon, Q_table
-    for episode in range(num_episodes):
-        state = env.reset()
-        total_reward = 0
-        done = False
-        while not done:
-            # Epsilon-greedy policy: choose action with exploration vs exploitation
-            if random.uniform(0, 1) < epsilon:
-                action = env.action_space.sample()  # Explore: Random action
-            else:
-                action = np.argmax(Q_table[state])  # Exploit: Best action from Q-table
-            # Take the action and observe the new state and reward
-            next_state, reward, done, _, info = env.step(action)
-            total_reward += reward
-            # Update the Q-value for the current state-action pair
-            old_q_value = Q_table[state, action]
-            future_q_value = np.max(Q_table[next_state])
-            new_q_value = old_q_value + learning_rate * (reward + discount_factor * future_q_value - old_q_value)
-            Q_table[state, action] = new_q_value
-            # Update state
-            state = next_state
-        # Decay epsilon to reduce exploration over time
-        epsilon = max(min_epsilon, epsilon * epsilon_decay)
-        # Display progress every 1000 episodes
-        if episode % 1000 == 0:
-            print(f"Episode {episode}/{num_episodes}, Epsilon: {epsilon:.4f}, Total reward: {total_reward}")
-    return Q_table
-# Define a function to run the agent after training
-def run_agent():
-    state = env.reset()
-    total_reward = 0
-    done = False
-    episode_steps = []
-    while not done:
-        # Choose the best action from the Q-table (exploitation)
-        action = np.argmax(Q_table[state])
-        next_state, reward, done, _, info = env.step(action)
-        total_reward += reward
-        episode_steps.append((state, action, reward))
-        # Update state
-        state = next_state
-        # Pause to allow visualization (optional)
-        time.sleep(0.1)
-    return total_reward, episode_steps
-# Gradio interface for running the agent
-def gradio_interface():
-    # First, train the agent
-    train_agent()
-    # Now run the agent and show results
-    total_reward, steps = run_agent()
-    print("Total Reward from the episode:", total_reward)
-    # Generate a visual output
-    return f"Total reward in the final episode: {total_reward}. The agent took {len(steps)} steps."
-# Set up Gradio Interface
-iface = gr.Interface(fn=gradio_interface,
-                     inputs=[],  # No inputs required for now
-                     outputs="text",
-                     title="Reinforcement Learning Agent on FrozenLake",
-                     description="This app runs a trained reinforcement learning agent on the FrozenLake-v1 environment. The agent is trained using Q-learning and will try to navigate the lake to reach the goal.")
-if __name__ == "__main__":
-    iface.launch()