Spaces:

c-gohlke
/

LitRL-Inference

Build error

App Files Files Community

c-gohlke commited on Feb 1, 2024

Commit

302ae2f

verified ·

1 Parent(s): 6033379

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

src/app.py +7 -39
src/app_state.py +56 -28
src/typing.py +14 -1

src/app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import sys
 from pathlib import Path
-from typing import Any, Generator, List
 if sys.version_info[:2] >= (3, 11):
     from typing import Annotated
@@ -13,16 +13,14 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
 from gymnasium.wrappers.record_video import RecordVideo
 from loguru import logger
-from pydantic import BaseModel
 from litrl.algo.mcts.agent import MCTSAgent
 from litrl.common.agent import RandomAgent
-from litrl.env.connect_four import Board, ConnectFour
 from litrl.env.make import make
 from litrl.env.typing import GymId
 from src.app_state import AppState
 from src.huggingface.huggingface_client import HuggingFaceClient
-from src.typing import CpuConfig
 def stream_mp4(mp4_path: Path) -> StreamingResponse:
@@ -33,35 +31,10 @@ def stream_mp4(mp4_path: Path) -> StreamingResponse:
     return StreamingResponse(content=iter_file(), media_type="video/mp4")
-ObservationType = List[Board]
-class GridResponseType(BaseModel):
-    grid: ObservationType
-    done: bool
-class BotResponseType(GridResponseType):
-    action: int
 def get_app_state() -> AppState:
     return AppState()
-def step(env: ConnectFour, action: int) -> GridResponseType:
-    env.step(action)
-    return observe(env)
-def observe(env: ConnectFour) -> GridResponseType:
-    obs = env.observe("player_1")
-    return GridResponseType(
-        grid=obs["observation"].tolist(),
-        done=bool(env.terminations[env.agent_selection] or env.truncations[env.agent_selection]),  # TODO why needed?
-    )
 def create_app() -> FastAPI:  # noqa: C901 # TODO move to routes
     app = FastAPI()
@@ -74,15 +47,13 @@ def create_app() -> FastAPI:  # noqa: C901 # TODO move to routes
         action: int,
         app_state: Annotated[AppState, Depends(dependency=get_app_state)],
     ) -> GridResponseType:
-        response = step(app_state.env, action)
-        app_state.inform_action(action=action)
-        return response
     @app.get(path="/connect_four/observe", response_model=GridResponseType)
     def endpoint_observe(
         app_state: Annotated[AppState, Depends(dependency=get_app_state)],
     ) -> GridResponseType:
-        return observe(app_state.env)
     @app.post(path="/connect_four/bot_play", response_model=BotResponseType)
     def endpoint_bot_play(
@@ -91,8 +62,7 @@ def create_app() -> FastAPI:  # noqa: C901 # TODO move to routes
     ) -> BotResponseType:
         app_state.set_config(cpu_config)
         action = app_state.get_action()
-        response = step(app_state.env, action)
-        app_state.inform_action(action=action)
         return BotResponseType(
             grid=response.grid,
             done=response.done,
@@ -107,7 +77,7 @@ def create_app() -> FastAPI:  # noqa: C901 # TODO move to routes
             if app_state.cpu_config.simulations is None:
                 raise ValueError
             if app_state.agent.mcts is None:
-                raise ValueError
             return float(
                 app_state.agent.mcts.root.visits / app_state.cpu_config.simulations,
             )  # TODO why not recognized as float?
@@ -117,9 +87,7 @@ def create_app() -> FastAPI:  # noqa: C901 # TODO move to routes
     def endpoint_reset(
         app_state: Annotated[AppState, Depends(dependency=get_app_state)],
     ) -> GridResponseType:
-        app_state.env.reset()
-        app_state.inform_reset()
-        return observe(app_state.env)
     @app.get(path="/get_huggingface_video")
     def endpoint_get_huggingface_video(

 import sys
 from pathlib import Path
+from typing import Any, Generator
 if sys.version_info[:2] >= (3, 11):
     from typing import Annotated
 from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
 from gymnasium.wrappers.record_video import RecordVideo
 from loguru import logger
 from litrl.algo.mcts.agent import MCTSAgent
 from litrl.common.agent import RandomAgent
 from litrl.env.make import make
 from litrl.env.typing import GymId
 from src.app_state import AppState
 from src.huggingface.huggingface_client import HuggingFaceClient
+from src.typing import BotResponseType, CpuConfig, GridResponseType
 def stream_mp4(mp4_path: Path) -> StreamingResponse:
     return StreamingResponse(content=iter_file(), media_type="video/mp4")
 def get_app_state() -> AppState:
     return AppState()
 def create_app() -> FastAPI:  # noqa: C901 # TODO move to routes
     app = FastAPI()
         action: int,
         app_state: Annotated[AppState, Depends(dependency=get_app_state)],
     ) -> GridResponseType:
+        return app_state.step(action)
     @app.get(path="/connect_four/observe", response_model=GridResponseType)
     def endpoint_observe(
         app_state: Annotated[AppState, Depends(dependency=get_app_state)],
     ) -> GridResponseType:
+        return app_state.observe()
     @app.post(path="/connect_four/bot_play", response_model=BotResponseType)
     def endpoint_bot_play(
     ) -> BotResponseType:
         app_state.set_config(cpu_config)
         action = app_state.get_action()
+        response = app_state.step(action)
         return BotResponseType(
             grid=response.grid,
             done=response.done,
             if app_state.cpu_config.simulations is None:
                 raise ValueError
             if app_state.agent.mcts is None:
+                return 1.0
             return float(
                 app_state.agent.mcts.root.visits / app_state.cpu_config.simulations,
             )  # TODO why not recognized as float?
     def endpoint_reset(
         app_state: Annotated[AppState, Depends(dependency=get_app_state)],
     ) -> GridResponseType:
+        return app_state.reset()
     @app.get(path="/get_huggingface_video")
     def endpoint_get_huggingface_video(

src/app_state.py CHANGED Viewed

@@ -21,23 +21,22 @@ from litrl.algo.mcts.mcts_config import MCTSConfigBuilder
 from litrl.algo.mcts.rollout import VanillaRollout
 from litrl.common.agent import Agent, RandomMultiAgent
 from litrl.model.sac.multi_agent import OnnxSacDeterministicMultiAgent
-from src.typing import AgentType, CpuConfig, RolloutPolicy
 class AppState:
     _instance: Self | None = None
     env: ConnectFour
     cpu_config: CpuConfig
-    agent: Agent[Any, int]
     def setup(self) -> None:
         logger.debug("AppState setup called")
         self.env = make_multiagent(id="connect_four", render_mode="rgb_array")
         self.env.reset(seed=123)
-        self.cpu_config: CpuConfig = CpuConfig(agent_type=AgentType.RANDOM)
-        self.set_agent()  # TODO in properties setter.
-        self.agent: Agent[Any, Any]
     def __new__(cls: type[AppState]) -> AppState:  # noqa: PYI034
         if cls._instance is None:
@@ -48,44 +47,73 @@ class AppState:
     def set_config(self, cpu_config: CpuConfig) -> None:
         logger.info(f"new cpu_config: {cpu_config}")
         if cpu_config != self.cpu_config:
             self.cpu_config = cpu_config
-            self.set_agent()
         else:
             logger.info("cpu_config unchanged")
-    def create_rollout(self) -> Agent[Any, Any]:
-        if self.cpu_config.rollout_policy == RolloutPolicy.SAC:
             return OnnxSacDeterministicMultiAgent()
         return RandomMultiAgent(np.random.default_rng(seed=123))
-    def set_agent(self) -> None:
-        if self.cpu_config.agent_type.value == AgentType.MCTS.value:
-            rollout_agent = self.create_rollout()
-            # fmt: off
-            mcts_config = (
-                MCTSConfigBuilder()
-                .set_simulations(self.cpu_config.simulations or 50)
-                .set_rollout_strategy(VanillaRollout(rollout_agent=rollout_agent))
-            ).build()
-            # fmt: on
-            self.agent = MCTSAgent(cfg=mcts_config)
-            logger.debug("set_agent: MCTSAgent")
-        elif self.cpu_config.agent_type.value == AgentType.RANDOM.value:
             self.agent = RandomMultiAgent()
-        elif self.cpu_config.agent_type.value == AgentType.SAC.value:
             self.agent = OnnxSacDeterministicMultiAgent()  # type: ignore[assignment]  # TODO
         else:
-            msg = f"cpu_config.name: {self.cpu_config.agent_type}"
             raise NotImplementedError(msg)
     def get_action(self) -> int:
         return self.agent.get_action(env=self.env)
-    def inform_reset(self) -> None:
         if isinstance(self.agent, MCTSAgent):
-            self.agent.inform_reset()
-    def inform_action(self, action: int) -> None:
-        """Update the agent's state as a result of external changes to the environment."""
         if isinstance(self.agent, MCTSAgent):
-            self.agent.inform_action(action)

 from litrl.algo.mcts.rollout import VanillaRollout
 from litrl.common.agent import Agent, RandomMultiAgent
 from litrl.model.sac.multi_agent import OnnxSacDeterministicMultiAgent
+from src.typing import AgentType, CpuConfig, GridResponseType, RolloutPolicy
 class AppState:
     _instance: Self | None = None
     env: ConnectFour
     cpu_config: CpuConfig
+    agent: Agent[Any, int] | None = None
     def setup(self) -> None:
         logger.debug("AppState setup called")
         self.env = make_multiagent(id="connect_four", render_mode="rgb_array")
         self.env.reset(seed=123)
+        self.cpu_config: CpuConfig = CpuConfig(agent_type=AgentType.MCTS, simulations=500)
+        self.set_agent(self.cpu_config)  # TODO in properties setter.
     def __new__(cls: type[AppState]) -> AppState:  # noqa: PYI034
         if cls._instance is None:
     def set_config(self, cpu_config: CpuConfig) -> None:
         logger.info(f"new cpu_config: {cpu_config}")
         if cpu_config != self.cpu_config:
+            self.set_agent(cpu_config)
             self.cpu_config = cpu_config
         else:
             logger.info("cpu_config unchanged")
+    def create_rollout(self, rollout_policy: RolloutPolicy) -> Agent[Any, Any]:
+        if rollout_policy == RolloutPolicy.SAC:
             return OnnxSacDeterministicMultiAgent()
         return RandomMultiAgent(np.random.default_rng(seed=123))
+    def can_reuse_mcts_computations(self, cpu_config: CpuConfig) -> bool:
+        return (
+            self.agent is not None
+            and isinstance(self.agent, MCTSAgent)
+            and self.agent.mcts is not None
+            and self.cpu_config.agent_type == AgentType.MCTS
+            and self.cpu_config.rollout_policy != cpu_config.rollout_policy
+        )
+    def set_agent(self, cpu_config: CpuConfig) -> None:
+        if cpu_config.agent_type == AgentType.MCTS:
+            if not self.can_reuse_mcts_computations(cpu_config):
+                # fmt: off
+                mcts_config = (
+                    MCTSConfigBuilder()
+                    .set_simulations(self.cpu_config.simulations or 50)
+                    .set_rollout_strategy(VanillaRollout(rollout_agent=self.create_rollout(cpu_config.rollout_policy)))
+                ).build()
+                # fmt: on
+                self.agent = MCTSAgent(cfg=mcts_config)
+                logger.debug("set_agent: MCTSAgent")
+            else:
+                if self.agent is None or not isinstance(self.agent, MCTSAgent) or self.agent.mcts is None:
+                    raise ValueError
+                self.agent.mcts.cfg.simulations = cpu_config.simulations
+        elif cpu_config.agent_type == AgentType.RANDOM:
             self.agent = RandomMultiAgent()
+        elif cpu_config.agent_type == AgentType.SAC:
             self.agent = OnnxSacDeterministicMultiAgent()  # type: ignore[assignment]  # TODO
         else:
+            msg = f"cpu_config.name: {cpu_config.agent_type}"
             raise NotImplementedError(msg)
     def get_action(self) -> int:
+        if self.agent is None:
+            raise ValueError
         return self.agent.get_action(env=self.env)
+    def step(self, action: int) -> GridResponseType:
         if isinstance(self.agent, MCTSAgent):
+            self.agent.step(self.env, action)
+        else:
+            self.env.step(action)
+        return self.observe()
+    def reset(self) -> GridResponseType:
         if isinstance(self.agent, MCTSAgent):
+            self.agent.reset(self.env)
+        else:
+            self.env.reset()
+        return self.observe()
+    def observe(self) -> None:
+        obs = self.env.observe("player_1")
+        return GridResponseType(  # type: ignore[no-any-return]
+            grid=obs["observation"].tolist(),
+            done=bool(
+                self.env.terminations[self.env.agent_selection] or self.env.truncations[self.env.agent_selection],
+            ),  # TODO why needed?
+        )

src/typing.py CHANGED Viewed

@@ -1,10 +1,14 @@
 from __future__ import annotations
 import enum
-from typing import Optional
 from pydantic import BaseModel
 class AgentType(enum.Enum):
     RANDOM = "random"
@@ -21,3 +25,12 @@ class CpuConfig(BaseModel):
     agent_type: AgentType
     simulations: Optional[int] = None  # noqa: UP007
     rollout_policy: Optional[RolloutPolicy] = None  # noqa: UP007

 from __future__ import annotations
 import enum
+from typing import List, Optional
 from pydantic import BaseModel
+from litrl.env.connect_four import Board
+ObservationType = List[Board]
 class AgentType(enum.Enum):
     RANDOM = "random"
     agent_type: AgentType
     simulations: Optional[int] = None  # noqa: UP007
     rollout_policy: Optional[RolloutPolicy] = None  # noqa: UP007
+class GridResponseType(BaseModel):
+    grid: ObservationType
+    done: bool
+class BotResponseType(GridResponseType):
+    action: int