Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ and this project adheres to
### Bug Fixes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed a bug in multi-agent cooperative training where agents might not receive all of the states of
terminated teammates. (#5441)

## [2.1.0-exp.1] - 2021-06-09
### Minor Changes
Expand Down
8 changes: 6 additions & 2 deletions ml-agents/mlagents/trainers/agent_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ def add_experiences(
self._process_step(
terminal_step, worker_id, terminal_steps.agent_id_to_index[local_id]
)
# Clear the last seen group obs when agents die.
self._clear_group_status_and_obs(global_id)

# Iterate over all the decision steps, first gather all the group obs
# and then create the trajectories. _add_to_group_status
Expand All @@ -135,6 +133,12 @@ def add_experiences(
self._process_step(
ongoing_step, worker_id, decision_steps.agent_id_to_index[local_id]
)
# Clear the last seen group obs when agents die, but only after all of the group
# statuses were added to the trajectory.
for terminal_step in terminal_steps.values():
local_id = terminal_step.agent_id
global_id = get_global_agent_id(worker_id, local_id)
self._clear_group_status_and_obs(global_id)

for _gid in action_global_agent_ids:
# If the ID doesn't have a last step result, the agent just reset,
Expand Down
8 changes: 6 additions & 2 deletions ml-agents/mlagents/trainers/tests/mock_brain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Tuple
from typing import List, Optional, Tuple
import numpy as np

from mlagents.trainers.buffer import AgentBuffer, AgentBufferKey
Expand All @@ -21,6 +21,7 @@ def create_mock_steps(
action_spec: ActionSpec,
done: bool = False,
grouped: bool = False,
agent_ids: Optional[List[int]] = None,
) -> Tuple[DecisionSteps, TerminalSteps]:
"""
Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations.
Expand All @@ -43,7 +44,10 @@ def create_mock_steps(

reward = np.array(num_agents * [1.0], dtype=np.float32)
interrupted = np.array(num_agents * [False], dtype=np.bool)
agent_id = np.arange(num_agents, dtype=np.int32)
if agent_ids is not None:
agent_id = np.array(agent_ids, dtype=np.int32)
else:
agent_id = np.arange(num_agents, dtype=np.int32)
_gid = 1 if grouped else 0
group_id = np.array(num_agents * [_gid], dtype=np.int32)
group_reward = np.array(num_agents * [0.0], dtype=np.float32)
Expand Down
34 changes: 28 additions & 6 deletions ml-agents/mlagents/trainers/tests/test_agent_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,32 +137,54 @@ def test_group_statuses():
)

# Make terminal steps for some dead agents
mock_decision_steps_2, mock_terminal_steps_2 = mb.create_mock_steps(
_, mock_terminal_steps_2 = mb.create_mock_steps(
num_agents=2,
observation_specs=create_observation_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
done=True,
grouped=True,
agent_ids=[2, 3],
)
# Make decision steps continue for other agents
mock_decision_steps_2, _ = mb.create_mock_steps(
num_agents=2,
observation_specs=create_observation_specs_with_shapes([(8,)]),
action_spec=ActionSpec.create_continuous(2),
done=False,
grouped=True,
agent_ids=[0, 1],
)

processor.add_experiences(
mock_decision_steps_2, mock_terminal_steps_2, 0, fake_action_info
)
fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
# Continue to add for remaining live agents
fake_action_info = _create_action_info(4, mock_decision_steps_2.agent_id)
for _ in range(3):
processor.add_experiences(
mock_decision_steps, mock_terminal_steps, 0, fake_action_info
mock_decision_steps_2, mock_terminal_steps, 0, fake_action_info
)

# Assert that four trajectories have been added to the Trainer
assert len(tqueue.put.call_args_list) == 4
# Last trajectory should be the longest

# Get the first trajectory, which should have been agent 2 (one of the killed agents)
trajectory = tqueue.put.call_args_list[0][0][-1]
assert len(trajectory.steps) == 3
# Make sure trajectory has the right Groupmate Experiences.
# All three steps should contain all agents
for step in trajectory.steps:
assert len(step.group_status) == 3

# Last trajectory should be the longest. It should be that of agent 1, one of the surviving agents.
trajectory = tqueue.put.call_args_list[-1][0][-1]
assert len(trajectory.steps) == 5

# Make sure trajectory has the right Groupmate Experiences
# Make sure trajectory has the right Groupmate Experiences.
# THe first 3 steps should contain all of the obs (that 3rd step is also the terminal step of 2 of the agents)
for step in trajectory.steps[0:3]:
assert len(step.group_status) == 3
# After 2 agents has died
# After 2 agents has died, there should only be 1 group status.
for step in trajectory.steps[3:]:
assert len(step.group_status) == 1

Expand Down