Skip to content

Commit 0ce2dab

Browse files
authored
Separate send environment data from reset (#4128)
1 parent 20527d1 commit 0ce2dab

File tree

4 files changed

+36
-7
lines changed

4 files changed

+36
-7
lines changed

ml-agents/mlagents/trainers/env_manager.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,15 @@ def reset(self, config: Dict = None) -> int:
6767
self.first_step_infos = self._reset_env(config)
6868
return len(self.first_step_infos)
6969

70+
@abstractmethod
71+
def set_env_parameters(self, config: Dict = None) -> None:
72+
"""
73+
Sends environment parameter settings to C# via the
74+
EnvironmentParametersSidehannel.
75+
:param config: Dict of environment parameter keys and values
76+
"""
77+
pass
78+
7079
@property
7180
@abstractmethod
7281
def external_brains(self) -> Dict[BehaviorName, BrainParameters]:

ml-agents/mlagents/trainers/learn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def maybe_add_samplers(
205205
for offset, v in enumerate(sampler_config.values()):
206206
if v.seed == -1:
207207
v.seed = run_seed + offset
208-
env.reset(config=sampler_config)
208+
env.set_env_parameters(config=sampler_config)
209209

210210

211211
def try_create_meta_curriculum(

ml-agents/mlagents/trainers/simple_env_manager.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,24 @@ def _step(self) -> List[EnvironmentStep]:
4343
def _reset_env(
4444
self, config: Dict[BehaviorName, float] = None
4545
) -> List[EnvironmentStep]: # type: ignore
46+
self.set_env_parameters(config)
47+
self.env.reset()
48+
all_step_result = self._generate_all_results()
49+
self.previous_step = EnvironmentStep(all_step_result, 0, {}, {})
50+
return [self.previous_step]
51+
52+
def set_env_parameters(self, config: Dict = None) -> None:
53+
"""
54+
Sends environment parameter settings to C# via the
55+
EnvironmentParametersSidehannel.
56+
:param config: Dict of environment parameter keys and values
57+
"""
4658
if config is not None:
4759
for k, v in config.items():
4860
if isinstance(v, float):
4961
self.env_params.set_float_parameter(k, v)
5062
elif isinstance(v, ParameterRandomizationSettings):
5163
v.apply(k, self.env_params)
52-
self.env.reset()
53-
all_step_result = self._generate_all_results()
54-
self.previous_step = EnvironmentStep(all_step_result, 0, {}, {})
55-
return [self.previous_step]
5664

5765
@property
5866
def external_brains(self) -> Dict[BehaviorName, BrainParameters]:

ml-agents/mlagents/trainers/subprocess_env_manager.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
class EnvironmentCommand(enum.Enum):
4747
STEP = 1
4848
EXTERNAL_BRAINS = 2
49-
GET_PROPERTIES = 3
49+
ENVIRONMENT_PARAMETERS = 3
5050
RESET = 4
5151
CLOSE = 5
5252
ENV_EXITED = 6
@@ -174,12 +174,13 @@ def external_brains():
174174
reset_timers()
175175
elif req.cmd == EnvironmentCommand.EXTERNAL_BRAINS:
176176
_send_response(EnvironmentCommand.EXTERNAL_BRAINS, external_brains())
177-
elif req.cmd == EnvironmentCommand.RESET:
177+
elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS:
178178
for k, v in req.payload.items():
179179
if isinstance(v, float):
180180
env_parameters.set_float_parameter(k, v)
181181
elif isinstance(v, ParameterRandomizationSettings):
182182
v.apply(k, env_parameters)
183+
elif req.cmd == EnvironmentCommand.RESET:
183184
env.reset()
184185
all_step_result = _generate_all_results()
185186
_send_response(EnvironmentCommand.RESET, all_step_result)
@@ -291,6 +292,8 @@ def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
291292
if not self.step_queue.empty():
292293
step = self.step_queue.get_nowait()
293294
self.env_workers[step.worker_id].waiting = False
295+
# Send config to environment
296+
self.set_env_parameters(config)
294297
# First enqueue reset commands for all workers so that they reset in parallel
295298
for ew in self.env_workers:
296299
ew.send(EnvironmentCommand.RESET, config)
@@ -299,6 +302,15 @@ def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
299302
ew.previous_step = EnvironmentStep(ew.recv().payload, ew.worker_id, {}, {})
300303
return list(map(lambda ew: ew.previous_step, self.env_workers))
301304

305+
def set_env_parameters(self, config: Dict = None) -> None:
306+
"""
307+
Sends environment parameter settings to C# via the
308+
EnvironmentParametersSidehannel for each worker.
309+
:param config: Dict of environment parameter keys and values
310+
"""
311+
for ew in self.env_workers:
312+
ew.send(EnvironmentCommand.ENVIRONMENT_PARAMETERS, config)
313+
302314
@property
303315
def external_brains(self) -> Dict[BehaviorName, BrainParameters]:
304316
self.env_workers[0].send(EnvironmentCommand.EXTERNAL_BRAINS)

0 commit comments

Comments
 (0)