Source code for sustaingym.envs.building.multiagent_env

"""
The module implements a multi-agent version of the building environment.
"""

from __future__ import annotations

from collections.abc import Mapping
from typing import Any

from gymnasium import spaces
import numpy as np
from pettingzoo import ParallelEnv

from .env import BuildingEnv


[docs] class MultiAgentBuildingEnv(ParallelEnv): """Multi-agent building environment. Each agent controls the AC unit in a single zone. Agent IDs are integers. This environment's API is known to be compatible with PettingZoo v1.24.1 Args: parameters: dict of parameters for the environment (see `BuildingEnv`) global_obs: whether each agent observes the global state or only the temperature of its own zone Attributes: # attributes required by pettingzoo.ParallelEnv agents: list[int], agent IDs, indices of zones with AC units possible_agents: list[int], same as agents observation_spaces: dict[int, spaces.Box], observation space for each agent action_spaces: dict[int, spaces.Box], action space for each agent # attributes specific to MultiAgentBuildingEnv single_env: BuildingEnv periods_delay: int, time periods of delay for inter-agent communication """ # PettingZoo API # TODO: check if still needed # metadata = {} def __init__(self, parameters: dict[str, Any]) -> None: super().__init__() # Create internal single-agent environment self.single_env = BuildingEnv(parameters) # PettingZoo API # zones with AC units self.possible_agents = np.nonzero(self.single_env.ac_map)[0].tolist() self.agents = self.possible_agents[:] self.observation_spaces = { agent: self.single_env.observation_space for agent in self.agents } if self.single_env.is_continuous_action: self.action_spaces = { agent: spaces.Box(-1.0, 1.0, shape=(1,), dtype=np.float32) for agent in self.agents } else: assert isinstance(self.single_env.action_space, spaces.MultiDiscrete) self.action_spaces = { agent: self.single_env.action_space[agent] for agent in self.agents }
[docs] def step(self, actions: Mapping[int, np.ndarray]) -> tuple[ dict[int, np.ndarray], dict[int, float], dict[int, bool], dict[int, bool], dict[int, dict[str, Any]], ]: """ Returns: obss, rewards, terminateds, truncateds, infos """ # Build action action = np.zeros(self.single_env.n, dtype=np.float32) for i, agent in enumerate(self.agents): action[agent] = actions[i] # Use internal single-agent environment obs, reward, terminated, truncated, info = self.single_env.step(action) self._state = obs obss, rewards, terminateds, truncateds, infos = {}, {}, {}, {}, {} for agent in self.agents: obss[agent] = obs rewards[agent] = reward terminateds[agent] = terminated truncateds[agent] = truncated infos[agent] = info # Delete all agents when day is finished if terminated or truncated: self.agents = [] return obss, rewards, terminateds, truncateds, infos
[docs] def reset( self, seed: int | None = None, options: dict | None = None ) -> tuple[dict[int, np.ndarray], dict[int, dict[str, Any]]]: """Resets the environment.""" obs, info = self.single_env.reset(seed=seed, options=options) self._state = obs self.agents = self.possible_agents[:] obss, infos = {}, {} for agent in self.agents: obss[agent] = obs infos[agent] = info return obss, infos
[docs] def render(self) -> None: """Render environment.""" self.single_env.render()
[docs] def close(self) -> None: """Close the environment.""" self.single_env.close()
[docs] def state(self) -> np.ndarray: return self._state
[docs] def observation_space(self, agent: int) -> spaces.Space: return self.observation_spaces[agent]
[docs] def action_space(self, agent: int) -> spaces.Box | spaces.Discrete: return self.action_spaces[agent]