Spaces:

BaljinderH
/

Tetris-RL

Sleeping

App Files Files Community

BaljinderH commited on Dec 2, 2024

Commit

b100cf9

verified ·

1 Parent(s): 97b342e

Upload 8 files

Browse files

Files changed (8) hide show

callbacks.py +32 -0
compile_video.py +16 -0
evaluate.py +41 -0
push_to_hub.py +39 -0
requirements.txt +8 -0
sandtris.py +123 -0
tetris_env.py +210 -0
train.py +42 -0

callbacks.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import os
+from stable_baselines3.common.callbacks import BaseCallback
+import imageio
+import numpy as np
+class SaveFramesCallback(BaseCallback):
+    """
+    Callback for saving frames during training.
+    """
+    def __init__(self, save_freq, save_path, verbose=0):
+        super(SaveFramesCallback, self).__init__(verbose)
+        self.save_freq = save_freq
+        self.save_path = save_path
+        self.frames = []
+        os.makedirs(self.save_path, exist_ok=True)
+    def _on_step(self) -> bool:
+        if self.num_timesteps % self.save_freq == 0:
+            # Render the environment and get the RGB array
+            frame = self.training_env.render(mode='rgb_array')
+            self.frames.append(frame)
+            if self.verbose > 0:
+                print(f"Saved frame at timestep {self.num_timesteps}")
+        return True
+    def _on_training_end(self) -> None:
+        # Save the frames as a GIF
+        if self.frames:
+            gif_path = os.path.join(self.save_path, "training.gif")
+            imageio.mimsave(gif_path, self.frames, fps=10)
+            if self.verbose > 0:
+                print(f"Saved training GIF to {gif_path}")

compile_video.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import imageio
+import os
+def compile_gif_to_video(gif_path, video_path):
+    reader = imageio.get_reader(gif_path)
+    fps = 10
+    writer = imageio.get_writer(video_path, fps=fps)
+    for frame in reader:
+        writer.append_data(frame)
+    writer.close()
+    print(f"Video saved to {video_path}")
+if __name__ == "__main__":
+    gif_path = "models/frames/training.gif"
+    video_path = "models/training.mp4"
+    compile_gif_to_video(gif_path, video_path)

evaluate.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import gym
+from stable_baselines3 import DQN
+from tetris_env import TetrisEnv
+import pygame
+import time
+def main():
+    # Create the environment
+    env = TetrisEnv()
+    # Load the trained model
+    model = DQN.load("models/dqn_tetris")
+    # Number of evaluation episodes
+    episodes = 5
+    for ep in range(1, episodes + 1):
+        obs = env.reset()
+        done = False
+        total_reward = 0
+        while not done:
+            # Render the game (optional)
+            env.render(mode='human')
+            # Predict the action using the trained model
+            action, _states = model.predict(obs, deterministic=True)
+            # Take the action in the environment
+            obs, reward, done, info = env.step(action)
+            total_reward += reward
+            # Control the rendering speed
+            pygame.time.wait(100)  # Wait 100 ms between steps
+        print(f"Episode {ep}: Total Reward = {total_reward}")
+    env.close()
+if __name__ == "__main__":
+    main()

push_to_hub.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from stable_baselines3 import DQN
+from huggingface_hub import HfApi, HfFolder, Repository
+import os
+def main():
+    # Define repository details
+    repo_name = "dqn-tetris"
+    model_path = "models/dqn_tetris.zip"
+    model_dir = "models"
+    # Load the trained model
+    model = DQN.load(model_path)
+    # Initialize Hugging Face API
+    api = HfApi()
+    user = api.whoami()["name"]
+    # Create the repository if it doesn't exist
+    try:
+        api.create_repo(name=repo_name, repo_type="model", exist_ok=True)
+        print(f"Repository '{repo_name}' created.")
+    except Exception as e:
+        print(f"Repository '{repo_name}' already exists or failed to create: {e}")
+    # Clone the repository locally
+    repo = Repository(local_dir=repo_name, clone_from=f"{user}/{repo_name}", use_auth_token=True)
+    # Copy the model file into the repository directory
+    os.makedirs(repo_name, exist_ok=True)
+    os.rename(model_path, os.path.join(repo_name, "dqn_tetris.zip"))
+    # Add and commit the model
+    repo.git_add(auto_lfs_track=True)
+    repo.git_commit("Add trained DQN Tetris model")
+    repo.git_push()
+    print(f"Model pushed to Hugging Face Hub at {user}/{repo_name}")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pygame
+gym
+stable-baselines3
+numpy
+torch
+huggingface-hub
+imageio
+gradio

sandtris.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import pygame
+import random
+colors = [
+    (0, 0, 0),
+    (120, 37, 179),
+    (100, 179, 179),
+    (80, 34, 22),
+    (80, 134, 22),
+    (180, 34, 22),
+    (180, 34, 122),
+]
+class Figure:
+    x = 0
+    y = 0
+    figures = [
+        [[1, 5, 9, 13], [4, 5, 6, 7]],
+        [[4, 5, 9, 10], [2, 6, 5, 9]],
+        [[6, 7, 9, 10], [1, 5, 6, 10]],
+        [[1, 2, 5, 9], [0, 4, 5, 6], [1, 5, 9, 8], [4, 5, 6, 10]],
+        [[1, 2, 6, 10], [5, 6, 7, 9], [2, 6, 10, 11], [3, 5, 6, 7]],
+        [[1, 4, 5, 6], [1, 4, 5, 9], [4, 5, 6, 9], [1, 5, 6, 9]],
+        [[1, 2, 5, 6]],
+    ]
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+        self.type = random.randint(0, len(self.figures) - 1)
+        self.color = random.randint(1, len(colors) - 1)
+        self.rotation = 0
+    def image(self):
+        return self.figures[self.type][self.rotation]
+    def rotate(self):
+        self.rotation = (self.rotation + 1) % len(self.figures[self.type])
+class Tetris:
+    def __init__(self, height, width):
+        self.level = 2
+        self.score = 0
+        self.state = "start"
+        self.field = []
+        self.height = height
+        self.width = width
+        self.x = 100
+        self.y = 60
+        self.zoom = 20
+        self.figure = None
+        for i in range(height):
+            new_line = []
+            for j in range(width):
+                new_line.append(0)
+            self.field.append(new_line)
+    def new_figure(self):
+        self.figure = Figure(3, 0)
+    def intersects(self):
+        intersection = False
+        for i in range(4):
+            for j in range(4):
+                if i * 4 + j in self.figure.image():
+                    if (
+                        i + self.figure.y > self.height - 1
+                        or j + self.figure.x > self.width - 1
+                        or j + self.figure.x < 0
+                        or self.field[i + self.figure.y][j + self.figure.x] > 0
+                    ):
+                        intersection = True
+        return intersection
+    def break_lines(self):
+        lines = 0
+        for i in range(1, self.height):
+            zeros = 0
+            for j in range(self.width):
+                if self.field[i][j] == 0:
+                    zeros += 1
+            if zeros == 0:
+                lines += 1
+                for i1 in range(i, 1, -1):
+                    for j in range(self.width):
+                        self.field[i1][j] = self.field[i1 - 1][j]
+        self.score += lines ** 2
+    def go_space(self):
+        while not self.intersects():
+            self.figure.y += 1
+        self.figure.y -= 1
+        self.freeze()
+    def go_down(self):
+        self.figure.y += 1
+        if self.intersects():
+            self.figure.y -= 1
+            self.freeze()
+    def freeze(self):
+        for i in range(4):
+            for j in range(4):
+                if i * 4 + j in self.figure.image():
+                    self.field[i + self.figure.y][j + self.figure.x] = self.figure.color
+        self.break_lines()
+        self.new_figure()
+        if self.intersects():
+            self.state = "gameover"
+    def go_side(self, dx):
+        old_x = self.figure.x
+        self.figure.x += dx
+        if self.intersects():
+            self.figure.x = old_x
+    def rotate(self):
+        old_rotation = self.figure.rotation
+        self.figure.rotate()
+        if self.intersects():
+            self.figure.rotation = old_rotation

tetris_env.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import gym
+from gym import spaces
+import numpy as np
+import pygame
+import random
+from sandtris import Tetris  # Ensure sandtris.py is in the same directory
+import os
+colors = [
+    (0, 0, 0),
+    (120, 37, 179),
+    (100, 179, 179),
+    (80, 34, 22),
+    (80, 134, 22),
+    (180, 34, 22),
+    (180, 34, 122),
+]
+class TetrisEnv(gym.Env):
+    """
+    Custom Environment for Tetris game compatible with OpenAI Gym
+    """
+    metadata = {'render.modes': ['human', 'rgb_array']}
+    def __init__(self):
+        super(TetrisEnv, self).__init__()
+        # Define action space: 0=left, 1=right, 2=rotate, 3=drop, 4=noop
+        self.action_space = spaces.Discrete(5)
+        # Observation space: 2D grid representing the game board
+        self.height = 20
+        self.width = 10
+        self.observation_space = spaces.Box(low=0, high=6,
+                                            shape=(self.height, self.width), dtype=np.int32)
+        # Initialize the game
+        self.game = Tetris(self.height, self.width)
+        # Setup for rendering
+        self.screen = None
+        self.zoom = 20
+        self.x = 100
+        self.y = 60
+    def reset(self):
+        """
+        Reset the game to initial state
+        """
+        self.game = Tetris(self.height, self.width)
+        self.game.new_figure()
+        return self._get_obs()
+    def step(self, action):
+        """
+        Execute one time step within the environment
+        """
+        done = False
+        reward = 0
+        # Apply action
+        if action == 0:
+            self.game.go_side(-1)  # Move left
+        elif action == 1:
+            self.game.go_side(1)   # Move right
+        elif action == 2:
+            self.game.rotate()     # Rotate
+        elif action == 3:
+            self.game.go_space()   # Drop
+        elif action == 4:
+            pass  # No operation
+        # Move the piece down automatically
+        self.game.go_down()
+        # Calculate reward
+        lines_cleared = self.game.score  # Assuming score increments with lines cleared
+        reward += lines_cleared * 10
+        # Additional reward shaping
+        aggregate_height = self.calculate_aggregate_height()
+        holes = self.calculate_holes()
+        bumpiness = self.calculate_bumpiness()
+        reward -= (aggregate_height * 0.5 + holes * 0.7 + bumpiness * 0.3)
+        if self.game.state == "gameover":
+            done = True
+            reward -= 10  # Penalty for losing
+        return self._get_obs(), reward, done, {}
+    def _get_obs(self):
+        """
+        Get the current state of the game as an observation
+        """
+        return np.array(self.game.field)
+    def render(self, mode='human'):
+        """
+        Render the game state as an RGB array or display it
+        """
+        if mode == 'rgb_array':
+            if self.screen is None:
+                pygame.init()
+                size = (self.x * 2 + self.zoom * self.width, self.y * 2 + self.zoom * self.height)
+                self.screen = pygame.Surface(size)
+            self.screen.fill((173, 216, 230))  # WHITE background
+            # Draw the game field
+            for i in range(self.game.height):
+                for j in range(self.game.width):
+                    rect = pygame.Rect(self.x + self.zoom * j, self.y + self.zoom * i, self.zoom, self.zoom)
+                    pygame.draw.rect(self.screen, (128, 128, 128), rect, 1)  # Grid lines
+                    if self.game.field[i][j] > 0:
+                        pygame.draw.rect(self.screen,
+                                         colors[self.game.field[i][j]],
+                                         rect.inflate(-2, -2))
+            # Draw the current figure
+            if self.game.figure is not None:
+                for i in range(4):
+                    for j in range(4):
+                        p = i * 4 + j
+                        if p in self.game.figure.image():
+                            rect = pygame.Rect(self.x + self.zoom * (j + self.game.figure.x),
+                                               self.y + self.zoom * (i + self.game.figure.y),
+                                               self.zoom, self.zoom)
+                            pygame.draw.rect(self.screen,
+                                             colors[self.game.figure.color],
+                                             rect.inflate(-2, -2))
+            # Convert Pygame surface to RGB array
+            return pygame.surfarray.array3d(self.screen)
+        elif mode == 'human':
+            if self.screen is None:
+                pygame.init()
+                size = (self.x * 2 + self.zoom * self.width, self.y * 2 + self.zoom * self.height)
+                self.screen = pygame.display.set_mode(size)
+                pygame.display.set_caption("Tetris RL")
+            self.screen.fill((173, 216, 230))  # WHITE background
+            # Draw the game field
+            for i in range(self.game.height):
+                for j in range(self.game.width):
+                    rect = pygame.Rect(self.x + self.zoom * j, self.y + self.zoom * i, self.zoom, self.zoom)
+                    pygame.draw.rect(self.screen, (128, 128, 128), rect, 1)  # Grid lines
+                    if self.game.field[i][j] > 0:
+                        pygame.draw.rect(self.screen,
+                                         colors[self.game.field[i][j]],
+                                         rect.inflate(-2, -2))
+            # Draw the current figure
+            if self.game.figure is not None:
+                for i in range(4):
+                    for j in range(4):
+                        p = i * 4 + j
+                        if p in self.game.figure.image():
+                            rect = pygame.Rect(self.x + self.zoom * (j + self.game.figure.x),
+                                               self.y + self.zoom * (i + self.game.figure.y),
+                                               self.zoom, self.zoom)
+                            pygame.draw.rect(self.screen,
+                                             colors[self.game.figure.color],
+                                             rect.inflate(-2, -2))
+            pygame.display.flip()
+    def close(self):
+        """
+        Clean up resources
+        """
+        if self.screen is not None:
+            pygame.display.quit()
+        pygame.quit()
+    # Reward shaping helper functions
+    def calculate_aggregate_height(self):
+        heights = [0 for _ in range(self.width)]
+        for j in range(self.width):
+            for i in range(self.height):
+                if self.game.field[i][j] != 0:
+                    heights[j] = self.height - i
+                    break
+        return sum(heights)
+    def calculate_holes(self):
+        holes = 0
+        for j in range(self.width):
+            block_found = False
+            for i in range(self.height):
+                if self.game.field[i][j] != 0:
+                    block_found = True
+                elif block_found and self.game.field[i][j] == 0:
+                    holes += 1
+        return holes
+    def calculate_bumpiness(self):
+        heights = [0 for _ in range(self.width)]
+        for j in range(self.width):
+            for i in range(self.height):
+                if self.game.field[i][j] != 0:
+                    heights[j] = self.height - i
+                    break
+        bumpiness = 0
+        for j in range(self.width - 1):
+            bumpiness += abs(heights[j] - heights[j + 1])
+        return bumpiness

train.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import gym
+from stable_baselines3 import DQN
+from stable_baselines3.common.evaluation import evaluate_policy
+from tetris_env import TetrisEnv
+from callbacks import SaveFramesCallback
+import os
+def main():
+    # Create the environment
+    env = TetrisEnv()
+    # Initialize the RL model (DQN)
+    model = DQN('MlpPolicy', env, verbose=1,
+                learning_rate=1e-3,
+                buffer_size=50000,
+                learning_starts=1000,
+                batch_size=32,
+                gamma=0.99,
+                target_update_interval=1000,
+                exploration_fraction=0.1,
+                exploration_final_eps=0.02)
+    # Define the number of training timesteps
+    TIMESTEPS = 100000  # Adjust as needed
+    # Initialize the callback
+    callback = SaveFramesCallback(save_freq=5000, save_path="models/frames", verbose=1)
+    # Train the model with the callback
+    model.learn(total_timesteps=TIMESTEPS, callback=callback)
+    # Save the model
+    os.makedirs("models", exist_ok=True)
+    model.save("models/dqn_tetris")
+    print("Model saved to models/dqn_tetris.zip")
+    # Evaluate the trained agent
+    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
+    print(f"Mean Reward: {mean_reward} +/- {std_reward}")
+if __name__ == "__main__":
+    main()