Spaces:

acozma
/

CS581-Algos-Demo

Sleeping

App Files Files Community

lharri73 commited on Apr 7, 2023

Commit

ea6b281

1 Parent(s): 1ac9ba4

added dp agent.

Browse files

Files changed (3) hide show

scripts/dpAgent.py +99 -0
scripts/requirements.txt +4 -0
scripts/stationaryGrid.py +85 -0

scripts/dpAgent.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import numpy as np
+import enum
+from matplotlib import pyplot as plt
+from tqdm import trange
+from numba import njit, prange
+from stationaryGrid import StationaryGrid
+class Action(enum.Enum):
+    UP = 0
+    DOWN = 1
+    LEFT = 2
+    RIGHT = 3
+class DP:
+    def __init__(self, grid):
+        self.grid = grid
+        self.size = len(grid)
+        self.V = np.zeros((self.size, self.size))
+        self.gamma = 0.9
+        self.actions = [Action.UP, Action.DOWN, Action.LEFT, Action.RIGHT]
+    def rewardFunc(self, state, action):
+        if action == Action.UP:
+            finalPos = (state[0] - 1, state[1])
+        elif action == Action.DOWN:
+            finalPos = (state[0] + 1, state[1])
+        elif action == Action.LEFT:
+            finalPos = (state[0], state[1] - 1)
+        elif action == Action.RIGHT:
+            finalPos = (state[0], state[1] + 1)
+        else:
+            raise ValueError("Invalid action")
+        if finalPos[0] < 0 or finalPos[0] >= self.size or finalPos[1] < 0 or finalPos[1] >= self.size:
+            return state, -1
+        elif self.grid[finalPos[0], finalPos[1]] == 0:
+            return state, -1
+        elif finalPos[0] == 0 and finalPos[1] == 0:
+            return finalPos, 10
+        return finalPos, 0
+    # @njit(parallel=True)
+    def run(self, num_iterations):
+        for it in trange(num_iterations):
+            V_copy = np.copy(self.V)
+            for state in np.ndindex(*self.grid.shape):
+                weighted_rewards = 0
+                for action in self.actions:
+                    finalPosition, reward = self.rewardFunc(state, action)
+                    weighted_rewards += (1 / len(self.actions)) * (
+                                reward + (self.gamma * self.V[finalPosition[0], finalPosition[1]]))
+                V_copy[state[0], state[1]] = weighted_rewards
+            self.V = V_copy
+            # plt.imshow(self.V)
+            # plt.savefig(f'imgs/{it}.png')
+            # print(it)
+    def policy(self, state):
+        """
+        The DP policy is to take the action that maximizes the value function.
+        This returns the best action and the final position after taking that action.
+        """
+        r = -np.inf
+        best = None
+        bestPos = None
+        for action in self.actions:
+            finalPosition, reward = self.rewardFunc(state, action)
+            if reward > r:
+                r = reward
+                best = action
+                bestPos = finalPosition
+        return best, bestPos, r
+    def find_path(self):
+        path = []
+        cur = (self.size - 1, self.size - 1)
+        path.append(cur)
+        i = 0
+        while cur != (0, 0):
+            _, cur, _ = self.policy(cur)
+            if cur in path:
+                print(path, cur)
+                raise ValueError("Infinite loop")
+            path.append(cur)
+        print(path)
+if __name__ == "__main__":
+    grid = StationaryGrid(4, size=20)
+    grid.create_grid()
+    dp = DP(grid.grid)
+    dp.run(10000)
+    dp.find_path()
+    plt.imshow(dp.V)
+    plt.savefig(f'imgs/{0}.png')

scripts/requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+numpy
+astar
+matplotlib
+tqdm

scripts/stationaryGrid.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import numpy as np
+import random
+from matplotlib import pyplot as plt
+from astar import AStar
+import pickle
+import enum
+class MazeSolver(AStar):
+    """
+    Because I'm too lazy to implement A-star, this class yoinked
+    from https://github.com/jrialland/python-astar/blob/f11311b678522d90c1786e6b8d9393095a0b733f/tests/maze/test_maze.py#L58
+    Sample use of the astar algorithm. In this exemple we work on a maze made of ascii characters,
+    and a 'node' is just a (x,y) tuple that represents a reachable position
+    """
+    def __init__(self, maze):
+        self.world = maze
+        self.size = maze.shape[0]
+    def heuristic_cost_estimate(self, n1, n2):
+        """computes the 'direct' distance between two (x,y) tuples"""
+        (x1, y1) = n1
+        (x2, y2) = n2
+        return np.hypot(x2 - x1, y2 - y1)
+    def distance_between(self, n1, n2):
+        """this method always returns 1, as two 'neighbors' are always adajcent"""
+        return 1
+    def neighbors(self, node):
+        """ for a given coordinate in the maze, returns up to 4 adjacent(north,east,south,west)
+            nodes that can be reached (=any adjacent coordinate that is not a wall)
+        """
+        x, y = node
+        return[(nx, ny) for nx, ny in[(x, y - 1), (x, y + 1), (x - 1, y), (x + 1, y)] if 0 <= nx < self.size and 0 <= ny < self.size and self.world[ny,nx] == 1]
+class StationaryGrid:
+    def __init__(self, seed, size=20):
+        np.random.seed(seed)
+        random.seed(seed)
+        self.size = size
+        self.grid = np.ones((size, size),dtype=np.uint8)
+    def create_grid(self):
+        n_obstacles = np.random.randint(1, 10)
+        i = 0
+        while i < n_obstacles:
+        # for i in range(n_obstacles):
+            x = np.random.randint(0, self.grid.shape[0])
+            y = np.random.randint(0, self.grid.shape[1])
+            size = np.random.randint(2, high=self.size // 2, size=(2,))
+            if x == 0 and y == 0:
+                continue
+            if (x + size[0]) >= self.grid.shape[0] and (y + size[1]) >= self.grid.shape[1]:
+                continue
+            start = (0, 0)
+            goal = (self.grid.shape[0] - 2, self.grid.shape[0] - 2)
+            self.grid[x:x + size[0], y:y + size[1]] = 0
+            # make sure there's still a path to the goal
+            path = MazeSolver(self.grid).astar(start, goal)
+            if path is None:
+                # if not, undo the current obstacle and generate another random one
+                self.grid[x:x + size[0], y:y + size[1]] = 1
+                continue
+            i += 1
+    def plot(self, pth=None):
+        plt.imshow(self.grid, cmap='gray')
+        plt.plot(0, 0,marker='o', markersize=10, color="red")
+        plt.plot(self.grid.shape[0]-1, self.grid.shape[1]-1, marker='o', markersize=10, color="green")
+        if pth is not None:
+            plt.savefig(pth)
+        else:
+            plt.show()
+if __name__ == '__main__':
+    for i in range(100):
+        grid = StationaryGrid(i, size=100)
+        grid.create_grid()
+        grid.plot(f'imgs/{i}.png')
+        print(i)