-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Action buffer #4612
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Action buffer #4612
Changes from all commits
ba8bdcf
6c93137
03f7e47
87d2049
a889e8f
24be76f
12fa45a
ff4f3b8
3e807c6
fed5c20
d119c1a
5a37dfe
1e5e440
194505e
4baaa7a
60337b8
841110a
ebd50b2
2e4dcf2
1337d07
c05c40e
1b96170
c940d41
b0d9a48
d2bb5d0
ad144c3
9090821
f23e395
785848e
9af9ee9
600d307
42bdfce
754f5b8
a8813fc
64091cc
a8204bd
b5ca548
ed11b10
442f29a
8733ec1
199d15b
00a824c
b0ed241
bfaa249
d927497
0d33e1f
8f06a67
da1c85a
080f3eb
f872359
5886f74
fe8fdd9
9479a65
3a90973
dbf819c
d1e2b97
e87effe
e0418dc
5f571a1
9089e63
f8d85fa
c21d223
f0f4249
d6eaf8d
e9848b1
b25fc3d
10944f1
6fcdd3f
6d4738b
5c8ec2d
0441118
86b6d71
2bf004c
aaf6c59
056cf6d
5691f60
b567fcd
116580a
b152511
bb9988c
c488e8e
589907a
c8ae8da
c651ebc
0dc4396
434f210
714b444
65d17fe
4fc60d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -244,6 +244,43 @@ def empty(spec: "BehaviorSpec") -> "TerminalSteps": | |
) | ||
|
||
|
||
class ActionTuple: | ||
""" | ||
An object whose fields correspond to actions of different types. | ||
Continuous and discrete actions are numpy arrays of type float32 and | ||
int32, respectively and are type checked on construction. | ||
Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size), | ||
respectively. | ||
""" | ||
|
||
def __init__(self, continuous: np.ndarray, discrete: np.ndarray): | ||
if continuous.dtype != np.float32: | ||
andrewcoh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
continuous = continuous.astype(np.float32, copy=False) | ||
self._continuous = continuous | ||
|
||
if discrete.dtype != np.int32: | ||
discrete = discrete.astype(np.int32, copy=False) | ||
self._discrete = discrete | ||
|
||
@property | ||
def continuous(self) -> np.ndarray: | ||
return self._continuous | ||
|
||
@property | ||
def discrete(self) -> np.ndarray: | ||
return self._discrete | ||
|
||
@staticmethod | ||
def create_continuous(continuous: np.ndarray) -> "ActionTuple": | ||
discrete = np.zeros((continuous.shape[0], 0), dtype=np.int32) | ||
return ActionTuple(continuous, discrete) | ||
|
||
@staticmethod | ||
def create_discrete(discrete: np.ndarray) -> "ActionTuple": | ||
continuous = np.zeros((discrete.shape[0], 0), dtype=np.float32) | ||
return ActionTuple(continuous, discrete) | ||
|
||
|
||
class ActionSpec(NamedTuple): | ||
""" | ||
A NamedTuple containing utility functions and information about the action spaces | ||
|
@@ -287,62 +324,61 @@ def discrete_size(self) -> int: | |
""" | ||
return len(self.discrete_branches) | ||
|
||
def empty_action(self, n_agents: int) -> np.ndarray: | ||
def empty_action(self, n_agents: int) -> ActionTuple: | ||
""" | ||
Generates a numpy array corresponding to an empty action (all zeros) | ||
Generates ActionTuple corresponding to an empty action (all zeros) | ||
for a number of agents. | ||
:param n_agents: The number of agents that will have actions generated | ||
""" | ||
if self.is_continuous(): | ||
return np.zeros((n_agents, self.continuous_size), dtype=np.float32) | ||
return np.zeros((n_agents, self.discrete_size), dtype=np.int32) | ||
continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32) | ||
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32) | ||
return ActionTuple(continuous, discrete) | ||
|
||
def random_action(self, n_agents: int) -> np.ndarray: | ||
def random_action(self, n_agents: int) -> ActionTuple: | ||
""" | ||
Generates a numpy array corresponding to a random action (either discrete | ||
Generates ActionTuple corresponding to a random action (either discrete | ||
or continuous) for a number of agents. | ||
:param n_agents: The number of agents that will have actions generated | ||
""" | ||
if self.is_continuous(): | ||
action = np.random.uniform( | ||
low=-1.0, high=1.0, size=(n_agents, self.continuous_size) | ||
).astype(np.float32) | ||
else: | ||
branch_size = self.discrete_branches | ||
action = np.column_stack( | ||
continuous = np.random.uniform( | ||
low=-1.0, high=1.0, size=(n_agents, self.continuous_size) | ||
) | ||
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32) | ||
if self.discrete_size > 0: | ||
discrete = np.column_stack( | ||
[ | ||
np.random.randint( | ||
0, | ||
branch_size[i], # type: ignore | ||
self.discrete_branches[i], # type: ignore | ||
size=(n_agents), | ||
dtype=np.int32, | ||
) | ||
for i in range(self.discrete_size) | ||
] | ||
) | ||
return action | ||
return ActionTuple(continuous, discrete) | ||
|
||
def _validate_action( | ||
andrewcoh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self, actions: np.ndarray, n_agents: int, name: str | ||
) -> np.ndarray: | ||
self, actions: ActionTuple, n_agents: int, name: str | ||
) -> ActionTuple: | ||
""" | ||
Validates that action has the correct action dim | ||
for the correct number of agents and ensures the type. | ||
""" | ||
if self.continuous_size > 0: | ||
_size = self.continuous_size | ||
else: | ||
_size = self.discrete_size | ||
_expected_shape = (n_agents, _size) | ||
if actions.shape != _expected_shape: | ||
_expected_shape = (n_agents, self.continuous_size) | ||
if self.continuous_size > 0 and actions.continuous.shape != _expected_shape: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why check if self.continuous_size > 0 ? same question line 36 with discrete. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is removed in the ActionModel because the defaults make it unnecessary |
||
raise UnityActionException( | ||
f"The behavior {name} needs a continuous input of dimension " | ||
f"{_expected_shape} for (<number of agents>, <action size>) but " | ||
f"received input of dimension {actions.continuous.shape}" | ||
) | ||
_expected_shape = (n_agents, self.discrete_size) | ||
if self.discrete_size > 0 and actions.discrete.shape != _expected_shape: | ||
raise UnityActionException( | ||
f"The behavior {name} needs an input of dimension " | ||
f"The behavior {name} needs a discrete input of dimension " | ||
f"{_expected_shape} for (<number of agents>, <action size>) but " | ||
f"received input of dimension {actions.shape}" | ||
f"received input of dimension {actions.discrete.shape}" | ||
) | ||
_expected_type = np.float32 if self.is_continuous() else np.int32 | ||
if actions.dtype != _expected_type: | ||
actions = actions.astype(_expected_type) | ||
return actions | ||
|
||
@staticmethod | ||
|
@@ -420,27 +456,30 @@ def behavior_specs(self) -> MappingType[str, BehaviorSpec]: | |
""" | ||
|
||
@abstractmethod | ||
def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None: | ||
def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None: | ||
""" | ||
Sets the action for all of the agents in the simulation for the next | ||
step. The Actions must be in the same order as the order received in | ||
the DecisionSteps. | ||
:param behavior_name: The name of the behavior the agents are part of | ||
:param action: A two dimensional np.ndarray corresponding to the action | ||
(either int or float) | ||
:param action: ActionTuple tuple of continuous and/or discrete action. | ||
Actions are np.arrays with dimensions (n_agents, continuous_size) and | ||
(n_agents, discrete_size), respectively. | ||
""" | ||
|
||
@abstractmethod | ||
def set_action_for_agent( | ||
self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray | ||
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple | ||
) -> None: | ||
""" | ||
Sets the action for one of the agents in the simulation for the next | ||
step. | ||
:param behavior_name: The name of the behavior the agent is part of | ||
:param agent_id: The id of the agent the action is set for | ||
:param action: A one dimensional np.ndarray corresponding to the action | ||
(either int or float) | ||
:param action: ActionTuple tuple of continuous and/or discrete action | ||
Actions are np.arrays with dimensions (1, continuous_size) and | ||
(1, discrete_size), respectively. Note, this initial dimensions of 1 is because | ||
this action is meant for a single agent. | ||
""" | ||
|
||
@abstractmethod | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -18,6 +18,7 @@ | |||||
DecisionSteps, | ||||||
TerminalSteps, | ||||||
BehaviorSpec, | ||||||
ActionTuple, | ||||||
BehaviorName, | ||||||
AgentId, | ||||||
BehaviorMapping, | ||||||
|
@@ -236,7 +237,7 @@ def __init__( | |||||
|
||||||
self._env_state: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {} | ||||||
self._env_specs: Dict[str, BehaviorSpec] = {} | ||||||
self._env_actions: Dict[str, np.ndarray] = {} | ||||||
self._env_actions: Dict[str, ActionTuple] = {} | ||||||
self._is_first_message = True | ||||||
self._update_behavior_specs(aca_output) | ||||||
|
||||||
|
@@ -336,7 +337,7 @@ def _assert_behavior_exists(self, behavior_name: str) -> None: | |||||
f"agent group in the environment" | ||||||
) | ||||||
|
||||||
def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None: | ||||||
def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None: | ||||||
self._assert_behavior_exists(behavior_name) | ||||||
if behavior_name not in self._env_state: | ||||||
return | ||||||
|
@@ -346,7 +347,7 @@ def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None: | |||||
self._env_actions[behavior_name] = action | ||||||
|
||||||
def set_action_for_agent( | ||||||
andrewcoh marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray | ||||||
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple | ||||||
) -> None: | ||||||
self._assert_behavior_exists(behavior_name) | ||||||
if behavior_name not in self._env_state: | ||||||
|
@@ -366,7 +367,10 @@ def set_action_for_agent( | |||||
agent_id | ||||||
) | ||||||
) from ie | ||||||
self._env_actions[behavior_name][index] = action | ||||||
if action_spec.continuous_size > 0: | ||||||
self._env_actions[behavior_name].continuous[index] = action.continuous[0, :] | ||||||
if action_spec.discrete_size > 0: | ||||||
self._env_actions[behavior_name].discrete[index] = action.discrete[0, :] | ||||||
|
||||||
def get_steps( | ||||||
self, behavior_name: BehaviorName | ||||||
|
@@ -410,15 +414,20 @@ def _close(self, timeout: Optional[int] = None) -> None: | |||||
|
||||||
@timed | ||||||
def _generate_step_input( | ||||||
self, vector_action: Dict[str, np.ndarray] | ||||||
self, vector_action: Dict[str, ActionTuple] | ||||||
) -> UnityInputProto: | ||||||
rl_in = UnityRLInputProto() | ||||||
for b in vector_action: | ||||||
n_agents = len(self._env_state[b][0]) | ||||||
if n_agents == 0: | ||||||
continue | ||||||
for i in range(n_agents): | ||||||
action = AgentActionProto(vector_actions=vector_action[b][i]) | ||||||
# TODO: This check will be removed when the oroto supports hybrid actions | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
if vector_action[b].continuous.shape[1] > 0: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why check on the shape[1] rather than action_specs.continuous_size? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ActionSpec is not available here. This is the only way I think we can determine if the action space is continuous or discrete in this function. Critically, this is just to support the old proto until the new proto is merged. This is just temporary so that the communication protocol works until the new protos are in place. |
||||||
_act = vector_action[b].continuous[i] | ||||||
else: | ||||||
_act = vector_action[b].discrete[i] | ||||||
action = AgentActionProto(vector_actions=_act) | ||||||
rl_in.agent_actions[b].value.extend([action]) | ||||||
rl_in.command = STEP | ||||||
rl_in.side_channel = bytes( | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need some constructor that will take only continuous or only discrete so the user does not have to create an empty array when using only discrete or only continuous.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why can't the default be
None
but in the constructor assigns an empty array when None is specified? This is a common pattern for mutable default parameters