From ca71d180b8df28aad96738545a28b8d4b0a00151 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Mon, 22 Jun 2020 14:42:16 -0700 Subject: [PATCH 01/26] Introduced the Constant Parameter Sampler that will be useful later as samplers and floats can be used interchangeably --- ml-agents/mlagents/trainers/settings.py | 48 +++++++++++++++++-------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index cbf032f6c6..beb6cab90f 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -180,6 +180,7 @@ def to_settings(self) -> type: ParameterRandomizationType.UNIFORM: UniformSettings, ParameterRandomizationType.GAUSSIAN: GaussianSettings, ParameterRandomizationType.MULTIRANGEUNIFORM: MultiRangeUniformSettings, + # Constant type is handled if a float is provided instead of a config } return _mapping[self] @@ -199,28 +200,33 @@ def structure(d: Mapping, t: type) -> Any: raise TrainerConfigError( f"Unsupported parameter randomization configuration {d}." ) - d_final: Dict[str, List[float]] = {} + d_final: Dict[str, ParameterRandomizationSettings] = {} for environment_parameter, environment_parameter_config in d.items(): if environment_parameter == "resampling-interval": logger.warning( "The resampling-interval is no longer necessary for parameter randomization. It is being ignored." ) continue - if "sampler_type" not in environment_parameter_config: - raise TrainerConfigError( - f"Sampler configuration for {environment_parameter} does not contain sampler_type." + if isinstance(environment_parameter_config, (float, int)): + d_final[environment_parameter] = ConstantSettings( + value=float(environment_parameter_config) ) - if "sampler_parameters" not in environment_parameter_config: - raise TrainerConfigError( - f"Sampler configuration for {environment_parameter} does not contain sampler_parameters." + else: + if "sampler_type" not in environment_parameter_config: + raise TrainerConfigError( + f"Sampler configuration for {environment_parameter} does not contain sampler_type." + ) + if "sampler_parameters" not in environment_parameter_config: + raise TrainerConfigError( + f"Sampler configuration for {environment_parameter} does not contain sampler_parameters." + ) + enum_key = ParameterRandomizationType( + environment_parameter_config["sampler_type"] + ) + t = enum_key.to_settings() + d_final[environment_parameter] = strict_to_cls( + environment_parameter_config["sampler_parameters"], t ) - enum_key = ParameterRandomizationType( - environment_parameter_config["sampler_type"] - ) - t = enum_key.to_settings() - d_final[environment_parameter] = strict_to_cls( - environment_parameter_config["sampler_parameters"], t - ) return d_final @abc.abstractmethod @@ -234,6 +240,20 @@ def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: pass +@attr.s(auto_attribs=True) +class ConstantSettings(ParameterRandomizationSettings): + value: float = 0.0 + + def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: + """ + Helper method to send sampler settings over EnvironmentParametersChannel + Calls the constant sampler type set method. + :param key: environment parameter to be sampled + :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment + """ + env_channel.set_float_parameter(key, self.value) + + @attr.s(auto_attribs=True) class UniformSettings(ParameterRandomizationSettings): min_value: float = attr.ib() From 6bb72962d8335940dc1e289908edd3f916558748 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Mon, 22 Jun 2020 16:59:02 -0700 Subject: [PATCH 02/26] Refactored the settings.py to refect the new format of the config.yaml --- ml-agents/mlagents/trainers/settings.py | 184 ++++++++++++++++-------- 1 file changed, 126 insertions(+), 58 deletions(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index beb6cab90f..6e0069cc39 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -119,6 +119,7 @@ def _reward_signal_steps_per_update_default(self): return self.steps_per_update +# INTRINSIC REWARD SIGNALS ############################################################# class RewardSignalType(Enum): EXTRINSIC: str = "extrinsic" GAIL: str = "gail" @@ -170,6 +171,7 @@ class CuriositySettings(RewardSignalSettings): learning_rate: float = 3e-4 +# SAMPLERS ############################################################################# class ParameterRandomizationType(Enum): UNIFORM: str = "uniform" GAUSSIAN: str = "gaussian" @@ -189,46 +191,6 @@ def to_settings(self) -> type: class ParameterRandomizationSettings(abc.ABC): seed: int = parser.get_default("seed") - @staticmethod - def structure(d: Mapping, t: type) -> Any: - """ - Helper method to structure a Dict of ParameterRandomizationSettings class. Meant to be registered with - cattr.register_structure_hook() and called with cattr.structure(). This is needed to handle - the special Enum selection of ParameterRandomizationSettings classes. - """ - if not isinstance(d, Mapping): - raise TrainerConfigError( - f"Unsupported parameter randomization configuration {d}." - ) - d_final: Dict[str, ParameterRandomizationSettings] = {} - for environment_parameter, environment_parameter_config in d.items(): - if environment_parameter == "resampling-interval": - logger.warning( - "The resampling-interval is no longer necessary for parameter randomization. It is being ignored." - ) - continue - if isinstance(environment_parameter_config, (float, int)): - d_final[environment_parameter] = ConstantSettings( - value=float(environment_parameter_config) - ) - else: - if "sampler_type" not in environment_parameter_config: - raise TrainerConfigError( - f"Sampler configuration for {environment_parameter} does not contain sampler_type." - ) - if "sampler_parameters" not in environment_parameter_config: - raise TrainerConfigError( - f"Sampler configuration for {environment_parameter} does not contain sampler_parameters." - ) - enum_key = ParameterRandomizationType( - environment_parameter_config["sampler_type"] - ) - t = enum_key.to_settings() - d_final[environment_parameter] = strict_to_cls( - environment_parameter_config["sampler_parameters"], t - ) - return d_final - @abc.abstractmethod def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: """ @@ -332,6 +294,127 @@ def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: ) +# ENVIRONMENT PARAMETERS ############################################################### +@attr.s(auto_attribs=True) +class NextLessonTriggerSettings: + class MeasureType(Enum): + PROGRESS: str = "progress" + REWARD: str = "reward" + + measure: MeasureType = attr.ib(default=MeasureType.REWARD) + behavior: str = attr.ib(default="") + min_lesson_length: int = 0 + signal_smoothing: bool = True + threshold: float = attr.ib(default=0.0) + + @threshold.validator + def _check_threshold_value(self, attribute, value): + if self.threshold > 1.0: + raise TrainerConfigError( + "Threshold for next lesson cannot be greater than 1." + ) + if self.threshold < 0.0: + raise TrainerConfigError( + "Threshold for next lesson cannot be greater negative." + ) + + +@attr.s(auto_attribs=True) +class Lesson: + next_lesson_trigger: Optional[NextLessonTriggerSettings] + sampler: ParameterRandomizationSettings + name: str + + +@attr.s(auto_attribs=True) +class EnvironmentParameterSettings: + lessons: List[Lesson] + + @staticmethod + def _sampler_from_config( + environment_parameter_config: Mapping + ) -> Optional[ParameterRandomizationSettings]: + if isinstance(environment_parameter_config, (float, int)): + sampler = ConstantSettings(value=float(environment_parameter_config)) + return sampler + elif "sampler_type" in environment_parameter_config: + # This is the non-constant sampler case + enum_key = ParameterRandomizationType( + environment_parameter_config["sampler_type"] + ) + t = enum_key.to_settings() + sampler = strict_to_cls( + environment_parameter_config["sampler_parameters"], t + ) + return sampler + return None + + @staticmethod + def structure(d: Mapping, t: type) -> Any: + """ + Helper method to structure a Dict of EnvironmentParameterSettings class. Meant to be registered with + cattr.register_structure_hook() and called with cattr.structure(). + """ + if not isinstance(d, Mapping): + raise TrainerConfigError( + f"Unsupported parameter environment parameter settings {d}." + ) + d_final: Dict[str, EnvironmentParameterSettings] = {} + for environment_parameter, environment_parameter_config in d.items(): + maybe_sampler = EnvironmentParameterSettings._sampler_from_config( + environment_parameter_config + ) + if maybe_sampler is not None: + d_final[environment_parameter] = EnvironmentParameterSettings( + lessons=[ + Lesson( + next_lesson_trigger=None, + sampler=maybe_sampler, + name=environment_parameter, + ) + ] + ) + elif "curriculum" in environment_parameter_config: + # This is the curriculum case + lessons: List[Lesson] = [] + for lesson_dict in environment_parameter_config["curriculum"]: + # a lesson_dict contains a single lesson with the name of the lesson as key + next_lesson_trigger = None + maybe_sampler = None + lesson_name = list(lesson_dict.keys())[0] + lesson_config = list(lesson_dict.values())[0] + if "next_lesson_trigger" in lesson_config: + next_lesson_trigger = strict_to_cls( + lesson_config["next_lesson_trigger"], + NextLessonTriggerSettings, + ) + if "value" in lesson_config: + maybe_sampler = EnvironmentParameterSettings._sampler_from_config( + lesson_config["value"] + ) + print(lesson_config["value"], maybe_sampler) + if "value" not in lesson_config or maybe_sampler is None: + raise TrainerConfigError( + f"Parameter {environment_parameter} in lesson {lesson_name} does not contain a valid value." + ) + lessons.append( + Lesson( + next_lesson_trigger=next_lesson_trigger, + sampler=maybe_sampler, + name=lesson_name, + ) + ) + d_final[environment_parameter] = EnvironmentParameterSettings( + lessons=lessons + ) + else: + raise TrainerConfigError( + f"The parameter {environment_parameter} does not contain a valid value." + ) + return d_final + + +# TRAINERS ############################################################################# @attr.s(auto_attribs=True) class SelfPlaySettings: save_steps: int = 20000 @@ -433,19 +516,7 @@ def structure(d: Mapping, t: type) -> Any: return t(**d_copy) -@attr.s(auto_attribs=True) -class CurriculumSettings: - class MeasureType: - PROGRESS: str = "progress" - REWARD: str = "reward" - - measure: str = attr.ib(default=MeasureType.REWARD) - thresholds: List[float] = attr.ib(factory=list) - min_lesson_length: int = 0 - signal_smoothing: bool = True - parameters: Dict[str, List[float]] = attr.ib(kw_only=True) - - +# COMMAND LINE ######################################################################### @attr.s(auto_attribs=True) class CheckpointSettings: run_id: str = parser.get_default("run_id") @@ -484,8 +555,7 @@ class RunOptions(ExportableSettings): ) env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings) engine_settings: EngineSettings = attr.ib(factory=EngineSettings) - parameter_randomization: Optional[Dict[str, ParameterRandomizationSettings]] = None - curriculum: Optional[Dict[str, CurriculumSettings]] = None + environment_parameters: Optional[Dict[str, EnvironmentParameterSettings]] = None checkpoint_settings: CheckpointSettings = attr.ib(factory=CheckpointSettings) # These are options that are relevant to the run itself, and not the engine or environment. @@ -496,10 +566,8 @@ class RunOptions(ExportableSettings): cattr.register_structure_hook(EngineSettings, strict_to_cls) cattr.register_structure_hook(CheckpointSettings, strict_to_cls) cattr.register_structure_hook( - Dict[str, ParameterRandomizationSettings], - ParameterRandomizationSettings.structure, + Dict[str, EnvironmentParameterSettings], EnvironmentParameterSettings.structure ) - cattr.register_structure_hook(CurriculumSettings, strict_to_cls) cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure) cattr.register_structure_hook( DefaultDict[str, TrainerSettings], TrainerSettings.dict_to_defaultdict From bced26200716df16d0c163f413e48b48f7afe5c6 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 23 Jun 2020 17:35:43 -0700 Subject: [PATCH 03/26] First working version --- config/ppo/3DBall_randomize.yaml | 24 ++- ml-agents/mlagents/trainers/curriculum.py | 91 ----------- ml-agents/mlagents/trainers/env_manager.py | 2 +- .../trainers/environment_parameter_manager.py | 140 +++++++++++++++++ ml-agents/mlagents/trainers/learn.py | 47 ++---- .../mlagents/trainers/meta_curriculum.py | 148 ------------------ ml-agents/mlagents/trainers/settings.py | 80 +++++++--- .../trainers/subprocess_env_manager.py | 4 +- .../mlagents/trainers/trainer_controller.py | 94 ++++------- ml-agents/mlagents/trainers/trainer_util.py | 26 ++- 10 files changed, 277 insertions(+), 379 deletions(-) delete mode 100644 ml-agents/mlagents/trainers/curriculum.py create mode 100644 ml-agents/mlagents/trainers/environment_parameter_manager.py delete mode 100644 ml-agents/mlagents/trainers/meta_curriculum.py diff --git a/config/ppo/3DBall_randomize.yaml b/config/ppo/3DBall_randomize.yaml index 2f3608b880..e3eb5e453c 100644 --- a/config/ppo/3DBall_randomize.yaml +++ b/config/ppo/3DBall_randomize.yaml @@ -25,14 +25,26 @@ behaviors: summary_freq: 12000 threaded: true -parameter_randomization: +environment_parameters: mass: sampler_type: uniform sampler_parameters: min_value: 0.5 max_value: 10 - scale: - sampler_type: uniform - sampler_parameters: - min_value: 0.75 - max_value: 3 + scale: + curriculum: + - FirstLessonForScale: + completion_criteria: + measure: reward + behavior: 3DBall + threshold: 30 + min_lesson_length: 100 + require_reset: true + value: 1 + - SecondLessonForScale: + value: + sampler_type: uniform + sampler_parameters: + min_value: 0.75 + max_value: 3 + diff --git a/ml-agents/mlagents/trainers/curriculum.py b/ml-agents/mlagents/trainers/curriculum.py deleted file mode 100644 index f81fb26c38..0000000000 --- a/ml-agents/mlagents/trainers/curriculum.py +++ /dev/null @@ -1,91 +0,0 @@ -import math -from typing import Dict, Any - -from mlagents.trainers.exception import CurriculumConfigError - -from mlagents_envs.logging_util import get_logger -from mlagents.trainers.settings import CurriculumSettings - -logger = get_logger(__name__) - - -class Curriculum: - def __init__(self, brain_name: str, settings: CurriculumSettings): - """ - Initializes a Curriculum object. - :param brain_name: Name of the brain this Curriculum is associated with - :param config: Dictionary of fields needed to configure the Curriculum - """ - self.max_lesson_num = 0 - self.measure = None - self._lesson_num = 0 - self.brain_name = brain_name - self.settings = settings - - self.smoothing_value = 0.0 - self.measure = self.settings.measure - self.min_lesson_length = self.settings.min_lesson_length - self.max_lesson_num = len(self.settings.thresholds) - - parameters = self.settings.parameters - for key in parameters: - if len(parameters[key]) != self.max_lesson_num + 1: - raise CurriculumConfigError( - f"The parameter {key} in {brain_name}'s curriculum must have {self.max_lesson_num + 1} values " - f"but {len(parameters[key])} were found" - ) - - @property - def lesson_num(self) -> int: - return self._lesson_num - - @lesson_num.setter - def lesson_num(self, lesson_num: int) -> None: - self._lesson_num = max(0, min(lesson_num, self.max_lesson_num)) - - def increment_lesson(self, measure_val: float) -> bool: - """ - Increments the lesson number depending on the progress given. - :param measure_val: Measure of progress (either reward or percentage - steps completed). - :return Whether the lesson was incremented. - """ - if not self.settings or not measure_val or math.isnan(measure_val): - return False - if self.settings.signal_smoothing: - measure_val = self.smoothing_value * 0.25 + 0.75 * measure_val - self.smoothing_value = measure_val - if self.lesson_num < self.max_lesson_num: - if measure_val > self.settings.thresholds[self.lesson_num]: - self.lesson_num += 1 - config = {} - parameters = self.settings.parameters - for key in parameters: - config[key] = parameters[key][self.lesson_num] - logger.info( - "{0} lesson changed. Now in lesson {1}: {2}".format( - self.brain_name, - self.lesson_num, - ", ".join([str(x) + " -> " + str(config[x]) for x in config]), - ) - ) - return True - return False - - def get_config(self, lesson: int = None) -> Dict[str, Any]: - """ - Returns reset parameters which correspond to the lesson. - :param lesson: The lesson you want to get the config of. If None, the - current lesson is returned. - :return: The configuration of the reset parameters. - """ - if not self.settings: - return {} - if lesson is None: - lesson = self.lesson_num - lesson = max(0, min(lesson, self.max_lesson_num)) - config = {} - parameters = self.settings.parameters - for key in parameters: - config[key] = parameters[key][lesson] - return config diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py index eeb4877b14..19baed9a0f 100644 --- a/ml-agents/mlagents/trainers/env_manager.py +++ b/ml-agents/mlagents/trainers/env_manager.py @@ -71,7 +71,7 @@ def reset(self, config: Dict = None) -> int: def set_env_parameters(self, config: Dict = None) -> None: """ Sends environment parameter settings to C# via the - EnvironmentParametersSidehannel. + EnvironmentParametersSideChannel. :param config: Dict of environment parameter keys and values """ pass diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py new file mode 100644 index 0000000000..2293d5869a --- /dev/null +++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py @@ -0,0 +1,140 @@ +from typing import Dict, List, Tuple +import numpy as np +import math +from mlagents.trainers.settings import ( + EnvironmentParameterSettings, + CompletionCriteriaSettings, + ParameterRandomizationSettings, +) +from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType + +from mlagents_envs.logging_util import get_logger + +logger = get_logger(__name__) + + +class EnvironmentParameterManager: + def __init__( + self, + settings: Dict[str, EnvironmentParameterSettings], + run_seed: int, + restore: bool, + ): + self._dict_settings = settings + for parameter_name in self._dict_settings.keys(): + initial_lesson = GlobalTrainingStatus.get_parameter_state( + parameter_name, StatusType.LESSON_NUM + ) + if initial_lesson is None or not restore: + GlobalTrainingStatus.set_parameter_state( + parameter_name, StatusType.LESSON_NUM, 0 + ) + self._smoothing_values: Dict[str, float] = {} + for key in self._dict_settings.keys(): + self._smoothing_values[key] = 0.0 + # Update the seeds of the samplers + self._set_sampler_seeds(run_seed) + + def _set_sampler_seeds(self, seed): + offset = 0 + for settings in self._dict_settings.values(): + for lesson in settings.lessons: + if lesson.sampler.seed == -1: + lesson.sampler.seed = seed + offset + offset += 1 + + def get_minimum_reward_buffer_size(self, behavior_name: str) -> int: + result = 1 + for settings in self._dict_settings.values(): + for lesson in settings.lessons: + if lesson.completion_criteria is not None: + if lesson.completion_criteria.behavior == behavior_name: + result = max( + result, lesson.completion_criteria.min_lesson_length + ) + return result + + def get_current_samplers(self) -> Dict[str, ParameterRandomizationSettings]: + samplers: Dict[str, ParameterRandomizationSettings] = {} + for param_name, settings in self._dict_settings.items(): + lesson_num = GlobalTrainingStatus.get_parameter_state( + param_name, StatusType.LESSON_NUM + ) + lesson = settings.lessons[lesson_num] + samplers[param_name] = lesson.sampler + return samplers + + def get_current_lesson_number(self) -> Dict[str, int]: + result: Dict[str, int] = {} + for parameter_name in self._dict_settings.keys(): + result[parameter_name] = GlobalTrainingStatus.get_parameter_state( + parameter_name, StatusType.LESSON_NUM + ) + return result + + def update_lessons( + self, + trainer_steps: Dict[str, int], + trainer_max_steps: Dict[str, int], + trainer_reward_buffer: Dict[str, List[float]], + ) -> Tuple[bool, bool]: + must_reset = False + updated = False + for param_name, settings in self._dict_settings.items(): + lesson_num = GlobalTrainingStatus.get_parameter_state( + param_name, StatusType.LESSON_NUM + ) + lesson = settings.lessons[lesson_num] + if ( + lesson.completion_criteria is not None + and len(settings.lessons) > lesson_num + ): + behavior_to_consider = lesson.completion_criteria.behavior + must_increment, new_smoothing = self._need_increment( + lesson.completion_criteria, + float(trainer_steps[behavior_to_consider]) + / float(trainer_max_steps[behavior_to_consider]), + trainer_reward_buffer[behavior_to_consider], + self._smoothing_values[param_name], + ) + self._smoothing_values[param_name] = new_smoothing + if must_increment: + GlobalTrainingStatus.set_parameter_state( + param_name, StatusType.LESSON_NUM, lesson_num + 1 + ) + logger.info( + f"Parameter '{param_name}' has changed. Now in lesson '{settings.lessons[lesson_num+1].name}'" + ) + updated = True + if lesson.completion_criteria.require_reset: + must_reset = True + return updated, must_reset + + @staticmethod + def _need_increment( + increment_condition: CompletionCriteriaSettings, + progress: float, + reward_buffer: List[float], + smoothing: float, + ) -> Tuple[bool, float]: + # Is the min number of episodes reached + if len(reward_buffer) < increment_condition.min_lesson_length: + return False, smoothing + if ( + increment_condition.measure + == CompletionCriteriaSettings.MeasureType.PROGRESS + ): + if progress > increment_condition.threshold: + return True, smoothing + if increment_condition.measure == CompletionCriteriaSettings.MeasureType.REWARD: + if len(reward_buffer) < 1: + return False, smoothing + measure = np.mean(reward_buffer) + if math.isnan(measure): + return False, smoothing + if increment_condition.signal_smoothing: + measure = 0.25 * smoothing + 0.75 * measure + smoothing = measure + if measure > increment_condition.threshold: + return True, smoothing + return False, smoothing diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 0ef42e37d0..149b1737d9 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -11,7 +11,7 @@ import mlagents_envs from mlagents import tf_utils from mlagents.trainers.trainer_controller import TrainerController -from mlagents.trainers.meta_curriculum import MetaCurriculum +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.trainer_util import TrainerFactory, handle_existing_directories from mlagents.trainers.stats import ( TensorboardWriter, @@ -22,7 +22,8 @@ ) from mlagents.trainers.cli_utils import parser from mlagents_envs.environment import UnityEnvironment -from mlagents.trainers.settings import RunOptions +from mlagents.trainers.settings import RunOptions, EnvironmentParameterSettings + from mlagents.trainers.training_status import GlobalTrainingStatus from mlagents_envs.base_env import BaseEnv from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager @@ -128,10 +129,10 @@ def run_training(run_seed: int, options: RunOptions) -> None: env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) - maybe_meta_curriculum = try_create_meta_curriculum( - options.curriculum, env_manager, restore=checkpoint_settings.resume + maybe_parameter_manager = try_create_param_manager( + options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) - maybe_add_samplers(options.parameter_randomization, env_manager, run_seed) + trainer_factory = TrainerFactory( options.behaviors, write_path, @@ -139,7 +140,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: checkpoint_settings.resume, run_seed, maybe_init_path, - maybe_meta_curriculum, + maybe_parameter_manager, False, ) # Create controller and begin training. @@ -147,7 +148,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: trainer_factory, write_path, checkpoint_settings.run_id, - maybe_meta_curriculum, + maybe_parameter_manager, not checkpoint_settings.inference, run_seed, ) @@ -191,33 +192,15 @@ def write_timing_tree(output_dir: str) -> None: ) -def maybe_add_samplers( - sampler_config: Optional[Dict], env: SubprocessEnvManager, run_seed: int -) -> None: - """ - Adds samplers to env if sampler config provided and sets seed if not configured. - :param sampler_config: validated dict of sampler configs. None if not included. - :param env: env manager to pass samplers via reset - :param run_seed: Random seed used for training. - """ - if sampler_config is not None: - # If the seed is not specified in yaml, this will grab the run seed - for offset, v in enumerate(sampler_config.values()): - if v.seed == -1: - v.seed = run_seed + offset - env.set_env_parameters(config=sampler_config) - - -def try_create_meta_curriculum( - curriculum_config: Optional[Dict], env: SubprocessEnvManager, restore: bool = False -) -> Optional[MetaCurriculum]: - if curriculum_config is None or len(curriculum_config) <= 0: +def try_create_param_manager( + config: Optional[Dict[str, EnvironmentParameterSettings]], + run_seed: int, + restore: bool = False, +) -> Optional[EnvironmentParameterManager]: + if config is None: return None else: - meta_curriculum = MetaCurriculum(curriculum_config) - if restore: - meta_curriculum.try_restore_all_curriculum() - return meta_curriculum + return EnvironmentParameterManager(config, run_seed, restore) def create_environment_factory( diff --git a/ml-agents/mlagents/trainers/meta_curriculum.py b/ml-agents/mlagents/trainers/meta_curriculum.py deleted file mode 100644 index 187a9345ff..0000000000 --- a/ml-agents/mlagents/trainers/meta_curriculum.py +++ /dev/null @@ -1,148 +0,0 @@ -"""Contains the MetaCurriculum class.""" - -from typing import Dict, Set -from mlagents.trainers.curriculum import Curriculum -from mlagents.trainers.settings import CurriculumSettings -from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType - -from mlagents_envs.logging_util import get_logger - -logger = get_logger(__name__) - - -class MetaCurriculum: - """A MetaCurriculum holds curricula. Each curriculum is associated to a - particular brain in the environment. - """ - - def __init__(self, curriculum_configs: Dict[str, CurriculumSettings]): - """Initializes a MetaCurriculum object. - - :param curriculum_folder: Dictionary of brain_name to the - Curriculum for each brain. - """ - self._brains_to_curricula: Dict[str, Curriculum] = {} - used_reset_parameters: Set[str] = set() - for brain_name, curriculum_settings in curriculum_configs.items(): - self._brains_to_curricula[brain_name] = Curriculum( - brain_name, curriculum_settings - ) - config_keys: Set[str] = set( - self._brains_to_curricula[brain_name].get_config().keys() - ) - - # Check if any two curricula use the same reset params. - if config_keys & used_reset_parameters: - logger.warning( - "Two or more curricula will " - "attempt to change the same reset " - "parameter. The result will be " - "non-deterministic." - ) - - used_reset_parameters.update(config_keys) - - @property - def brains_to_curricula(self): - """A dict from brain_name to the brain's curriculum.""" - return self._brains_to_curricula - - @property - def lesson_nums(self): - """A dict from brain name to the brain's curriculum's lesson number.""" - lesson_nums = {} - for brain_name, curriculum in self.brains_to_curricula.items(): - lesson_nums[brain_name] = curriculum.lesson_num - - return lesson_nums - - @lesson_nums.setter - def lesson_nums(self, lesson_nums): - for brain_name, lesson in lesson_nums.items(): - self.brains_to_curricula[brain_name].lesson_num = lesson - - def _lesson_ready_to_increment( - self, brain_name: str, reward_buff_size: int - ) -> bool: - """Determines whether the curriculum of a specified brain is ready - to attempt an increment. - - Args: - brain_name (str): The name of the brain whose curriculum will be - checked for readiness. - reward_buff_size (int): The size of the reward buffer of the trainer - that corresponds to the specified brain. - - Returns: - Whether the curriculum of the specified brain should attempt to - increment its lesson. - """ - if brain_name not in self.brains_to_curricula: - return False - - return reward_buff_size >= ( - self.brains_to_curricula[brain_name].min_lesson_length - ) - - def increment_lessons(self, measure_vals, reward_buff_sizes=None): - """Attempts to increments all the lessons of all the curricula in this - MetaCurriculum. Note that calling this method does not guarantee the - lesson of a curriculum will increment. The lesson of a curriculum will - only increment if the specified measure threshold defined in the - curriculum has been reached and the minimum number of episodes in the - lesson have been completed. - - Args: - measure_vals (dict): A dict of brain name to measure value. - reward_buff_sizes (dict): A dict of brain names to the size of their - corresponding reward buffers. - - Returns: - A dict from brain name to whether that brain's lesson number was - incremented. - """ - ret = {} - if reward_buff_sizes: - for brain_name, buff_size in reward_buff_sizes.items(): - if self._lesson_ready_to_increment(brain_name, buff_size): - measure_val = measure_vals[brain_name] - ret[brain_name] = self.brains_to_curricula[ - brain_name - ].increment_lesson(measure_val) - else: - for brain_name, measure_val in measure_vals.items(): - ret[brain_name] = self.brains_to_curricula[brain_name].increment_lesson( - measure_val - ) - return ret - - def try_restore_all_curriculum(self): - """ - Tries to restore all the curriculums to what is saved in training_status.json - """ - - for brain_name, curriculum in self.brains_to_curricula.items(): - lesson_num = GlobalTrainingStatus.get_parameter_state( - brain_name, StatusType.LESSON_NUM - ) - if lesson_num is not None: - logger.info( - f"Resuming curriculum for {brain_name} at lesson {lesson_num}." - ) - curriculum.lesson_num = lesson_num - else: - curriculum.lesson_num = 0 - - def get_config(self): - """Get the combined configuration of all curricula in this - MetaCurriculum. - - :return: A dict from parameter to value. - """ - config = {} - - for _, curriculum in self.brains_to_curricula.items(): - curr_config = curriculum.get_config() - config.update(curr_config) - - return config diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 6e0069cc39..b0e4b171cc 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -296,7 +296,12 @@ def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: # ENVIRONMENT PARAMETERS ############################################################### @attr.s(auto_attribs=True) -class NextLessonTriggerSettings: +class CompletionCriteriaSettings: + """ + CompletionCriteriaSettings contains the information needed to figure out if the next + lesson must start. + """ + class MeasureType(Enum): PROGRESS: str = "progress" REWARD: str = "reward" @@ -306,34 +311,65 @@ class MeasureType(Enum): min_lesson_length: int = 0 signal_smoothing: bool = True threshold: float = attr.ib(default=0.0) + require_reset: bool = False @threshold.validator def _check_threshold_value(self, attribute, value): - if self.threshold > 1.0: - raise TrainerConfigError( - "Threshold for next lesson cannot be greater than 1." - ) - if self.threshold < 0.0: - raise TrainerConfigError( - "Threshold for next lesson cannot be greater negative." - ) + """ + Verify that the threshold has a value between 0 and 1 when the measure is + PROGRESS + """ + if self.measure == self.MeasureType.PROGRESS: + if self.threshold > 1.0: + raise TrainerConfigError( + "Threshold for next lesson cannot be greater than 1 when the measure is progress." + ) + if self.threshold < 0.0: + raise TrainerConfigError( + "Threshold for next lesson cannot be greater negative when the measure is progress." + ) @attr.s(auto_attribs=True) class Lesson: - next_lesson_trigger: Optional[NextLessonTriggerSettings] + """ + Gathers the data of one lesson for one environment parameter including its name, + the condition that must be fullfiled for the lesson to be completed and a sampler + for the environment parameter. If the completion_criteria is None, then this is + the last lesson in the curriculum. + """ + + completion_criteria: Optional[CompletionCriteriaSettings] sampler: ParameterRandomizationSettings name: str @attr.s(auto_attribs=True) class EnvironmentParameterSettings: + """ + EnvironmentParameterSettings is an ordered list of lessons for one environment + parameter. + """ + lessons: List[Lesson] + @staticmethod + def _check_lesson_chain(lessons, parameter_name): + num_lessons = len(lessons) + for index, lesson in enumerate(lessons): + if index < num_lessons - 1 and lesson.completion_criteria is None: + raise TrainerConfigError( + f"A non-terminal lesson does not have a completion_criteria for {parameter_name}." + ) + @staticmethod def _sampler_from_config( environment_parameter_config: Mapping ) -> Optional[ParameterRandomizationSettings]: + """ + Returns a ParameterRandomizationSettings when the environment_parameter_config + argument corresponds to a sampler and None otherwise. + """ if isinstance(environment_parameter_config, (float, int)): sampler = ConstantSettings(value=float(environment_parameter_config)) return sampler @@ -350,10 +386,11 @@ def _sampler_from_config( return None @staticmethod - def structure(d: Mapping, t: type) -> Any: + def structure(d: Mapping, t: type) -> Dict[str, "EnvironmentParameterSettings"]: """ - Helper method to structure a Dict of EnvironmentParameterSettings class. Meant to be registered with - cattr.register_structure_hook() and called with cattr.structure(). + Helper method to structure a Dict of EnvironmentParameterSettings class. Meant + to be registered with cattr.register_structure_hook() and called with + cattr.structure(). """ if not isinstance(d, Mapping): raise TrainerConfigError( @@ -368,7 +405,7 @@ def structure(d: Mapping, t: type) -> Any: d_final[environment_parameter] = EnvironmentParameterSettings( lessons=[ Lesson( - next_lesson_trigger=None, + completion_criteria=None, sampler=maybe_sampler, name=environment_parameter, ) @@ -379,14 +416,14 @@ def structure(d: Mapping, t: type) -> Any: lessons: List[Lesson] = [] for lesson_dict in environment_parameter_config["curriculum"]: # a lesson_dict contains a single lesson with the name of the lesson as key - next_lesson_trigger = None + completion_criteria = None maybe_sampler = None lesson_name = list(lesson_dict.keys())[0] lesson_config = list(lesson_dict.values())[0] - if "next_lesson_trigger" in lesson_config: - next_lesson_trigger = strict_to_cls( - lesson_config["next_lesson_trigger"], - NextLessonTriggerSettings, + if "completion_criteria" in lesson_config: + completion_criteria = strict_to_cls( + lesson_config["completion_criteria"], + CompletionCriteriaSettings, ) if "value" in lesson_config: maybe_sampler = EnvironmentParameterSettings._sampler_from_config( @@ -399,11 +436,14 @@ def structure(d: Mapping, t: type) -> Any: ) lessons.append( Lesson( - next_lesson_trigger=next_lesson_trigger, + completion_criteria=completion_criteria, sampler=maybe_sampler, name=lesson_name, ) ) + EnvironmentParameterSettings._check_lesson_chain( + lessons, environment_parameter + ) d_final[environment_parameter] = EnvironmentParameterSettings( lessons=lessons ) diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index f50fc52747..53685e3892 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -176,9 +176,7 @@ def external_brains(): _send_response(EnvironmentCommand.EXTERNAL_BRAINS, external_brains()) elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS: for k, v in req.payload.items(): - if isinstance(v, float): - env_parameters.set_float_parameter(k, v) - elif isinstance(v, ParameterRandomizationSettings): + if isinstance(v, ParameterRandomizationSettings): v.apply(k, env_parameters) elif req.cmd == EnvironmentCommand.RESET: env.reset() diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index fdb73bca03..fac61b044a 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -24,12 +24,10 @@ merge_gauges, ) from mlagents.trainers.trainer import Trainer -from mlagents.trainers.meta_curriculum import MetaCurriculum +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.trainer_util import TrainerFactory from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers from mlagents.trainers.agent_processor import AgentManager -from mlagents.trainers.settings import CurriculumSettings -from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType class TrainerController(object): @@ -38,7 +36,7 @@ def __init__( trainer_factory: TrainerFactory, output_path: str, run_id: str, - meta_curriculum: Optional[MetaCurriculum], + param_manager: Optional[EnvironmentParameterManager], train: bool, training_seed: int, ): @@ -46,7 +44,8 @@ def __init__( :param output_path: Path to save the model. :param summaries_dir: Folder to save training summaries. :param run_id: The sub-directory name for model and summary statistics - :param meta_curriculum: MetaCurriculum object which stores information about all curricula. + :param param_manager: EnvironmentParameterManager object which stores information about all + environment parameters. :param train: Whether to train model, or only run inference. :param training_seed: Seed to use for Numpy and Tensorflow random number generation. :param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging. @@ -58,7 +57,7 @@ def __init__( self.logger = get_logger(__name__) self.run_id = run_id self.train_model = train - self.meta_curriculum = meta_curriculum + self.param_manager = param_manager self.ghost_controller = self.trainer_factory.ghost_controller self.trainer_threads: List[threading.Thread] = [] @@ -66,30 +65,6 @@ def __init__( np.random.seed(training_seed) tf.set_random_seed(training_seed) - def _get_measure_vals(self): - brain_names_to_measure_vals = {} - if self.meta_curriculum: - for ( - brain_name, - curriculum, - ) in self.meta_curriculum.brains_to_curricula.items(): - # Skip brains that are in the metacurriculum but no trainer yet. - if brain_name not in self.trainers: - continue - if curriculum.measure == CurriculumSettings.MeasureType.PROGRESS: - measure_val = self.trainers[brain_name].get_step / float( - self.trainers[brain_name].get_max_steps - ) - brain_names_to_measure_vals[brain_name] = measure_val - elif curriculum.measure == CurriculumSettings.MeasureType.REWARD: - measure_val = np.mean(self.trainers[brain_name].reward_buffer) - brain_names_to_measure_vals[brain_name] = measure_val - else: - for brain_name, trainer in self.trainers.items(): - measure_val = np.mean(trainer.reward_buffer) - brain_names_to_measure_vals[brain_name] = measure_val - return brain_names_to_measure_vals - @timed def _save_model(self): """ @@ -135,10 +110,10 @@ def _reset_env(self, env: EnvManager) -> None: A Data structure corresponding to the initial reset state of the environment. """ - new_meta_curriculum_config = ( - self.meta_curriculum.get_config() if self.meta_curriculum else {} + new_config = ( + self.param_manager.get_current_samplers() if self.param_manager else {} ) - env.reset(config=new_meta_curriculum_config) + env.reset(config=new_config) def _not_done_training(self) -> bool: return ( @@ -235,38 +210,35 @@ def start_learning(self, env_manager: EnvManager) -> None: self._save_model() self._export_graph() - def end_trainer_episodes( - self, env: EnvManager, lessons_incremented: Dict[str, bool] - ) -> None: - self._reset_env(env) + def end_trainer_episodes(self) -> None: # Reward buffers reset takes place only for curriculum learning # else no reset. for trainer in self.trainers.values(): trainer.end_episode() - for brain_name, changed in lessons_incremented.items(): - if changed: - self.trainers[brain_name].reward_buffer.clear() def reset_env_if_ready(self, env: EnvManager) -> None: - if self.meta_curriculum: + if self.param_manager: # Get the sizes of the reward buffers. - reward_buff_sizes = { - k: len(t.reward_buffer) for (k, t) in self.trainers.items() - } + reward_buff = {k: list(t.reward_buffer) for (k, t) in self.trainers.items()} + curr_step = {k: int(t.step) for (k, t) in self.trainers.items()} + max_step = {k: int(t.get_max_steps) for (k, t) in self.trainers.items()} # Attempt to increment the lessons of the brains who # were ready. - lessons_incremented = self.meta_curriculum.increment_lessons( - self._get_measure_vals(), reward_buff_sizes=reward_buff_sizes + updated, param_must_reset = self.param_manager.update_lessons( + curr_step, max_step, reward_buff ) else: - lessons_incremented = {} - # If any lessons were incremented or the environment is - # ready to be reset - meta_curriculum_reset = any(lessons_incremented.values()) + updated, param_must_reset = False, False + if updated: + for trainer in self.trainers.values(): + trainer.reward_buffer.clear() # If ghost trainer swapped teams ghost_controller_reset = self.ghost_controller.should_reset() - if meta_curriculum_reset or ghost_controller_reset: - self.end_trainer_episodes(env, lessons_incremented) + if param_must_reset or ghost_controller_reset: + self._reset_env(env) # This reset also sends the new config to env + self.end_trainer_episodes() + elif updated and self.param_manager: + env.set_env_parameters(self.param_manager.get_current_samplers()) @timed def advance(self, env: EnvManager) -> int: @@ -274,15 +246,15 @@ def advance(self, env: EnvManager) -> int: with hierarchical_timer("env_step"): num_steps = env.advance() - # Report current lesson - if self.meta_curriculum: - for brain_name, curr in self.meta_curriculum.brains_to_curricula.items(): - if brain_name in self.trainers: - self.trainers[brain_name].stats_reporter.set_stat( - "Environment/Lesson", curr.lesson_num - ) - GlobalTrainingStatus.set_parameter_state( - brain_name, StatusType.LESSON_NUM, curr.lesson_num + # Report current lesson for each environment parameter + if self.param_manager: + for ( + param_name, + lesson_number, + ) in self.param_manager.get_current_lesson_number().items(): + for trainer in self.trainers.values(): + trainer.stats_reporter.set_stat( + f"Environment/Lesson/{param_name}", lesson_number ) for trainer in self.trainers.values(): diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 450116e9cf..8ee90c3402 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -1,8 +1,8 @@ import os -from typing import Dict +from typing import Dict, Optional from mlagents_envs.logging_util import get_logger -from mlagents.trainers.meta_curriculum import MetaCurriculum +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.exception import TrainerConfigError from mlagents.trainers.trainer import Trainer from mlagents.trainers.exception import UnityTrainerException @@ -25,7 +25,7 @@ def __init__( load_model: bool, seed: int, init_path: str = None, - meta_curriculum: MetaCurriculum = None, + param_manager: Optional[EnvironmentParameterManager] = None, multi_gpu: bool = False, ): self.trainer_config = trainer_config @@ -34,7 +34,7 @@ def __init__( self.train_model = train_model self.load_model = load_model self.seed = seed - self.meta_curriculum = meta_curriculum + self.param_manager = param_manager self.multi_gpu = multi_gpu self.ghost_controller = GhostController() @@ -48,7 +48,7 @@ def generate(self, brain_name: str) -> Trainer: self.ghost_controller, self.seed, self.init_path, - self.meta_curriculum, + self.param_manager, self.multi_gpu, ) @@ -62,7 +62,7 @@ def initialize_trainer( ghost_controller: GhostController, seed: int, init_path: str = None, - meta_curriculum: MetaCurriculum = None, + param_manager: Optional[EnvironmentParameterManager] = None, multi_gpu: bool = False, ) -> Trainer: """ @@ -78,7 +78,7 @@ def initialize_trainer( :param ghost_controller: The object that coordinates ghost trainers :param seed: The random seed to use :param init_path: Path from which to load model, if different from model_path. - :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer + :param param_manager: Optional EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer :return: """ trainer_artifact_path = os.path.join(output_path, brain_name) @@ -86,16 +86,8 @@ def initialize_trainer( trainer_settings.init_path = os.path.join(init_path, brain_name) min_lesson_length = 1 - if meta_curriculum: - if brain_name in meta_curriculum.brains_to_curricula: - min_lesson_length = meta_curriculum.brains_to_curricula[ - brain_name - ].min_lesson_length - else: - logger.warning( - f"Metacurriculum enabled, but no curriculum for brain {brain_name}. " - f"Brains with curricula: {meta_curriculum.brains_to_curricula.keys()}. " - ) + if param_manager: + min_lesson_length = param_manager.get_minimum_reward_buffer_size(brain_name) trainer: Trainer = None # type: ignore # will be set to one of these, or raise trainer_type = trainer_settings.trainer_type From 99297b911e74512c2fe4032bdf6bdf74e2509393 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 24 Jun 2020 11:41:27 -0700 Subject: [PATCH 04/26] Added the unit tests --- ml-agents/mlagents/trainers/settings.py | 3 +- .../trainers/tests/test_curriculum.py | 77 ------ .../trainers/tests/test_env_param_manager.py | 256 ++++++++++++++++++ .../trainers/tests/test_meta_curriculum.py | 136 ---------- 4 files changed, 258 insertions(+), 214 deletions(-) delete mode 100644 ml-agents/mlagents/trainers/tests/test_curriculum.py create mode 100644 ml-agents/mlagents/trainers/tests/test_env_param_manager.py delete mode 100644 ml-agents/mlagents/trainers/tests/test_meta_curriculum.py diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index b0e4b171cc..65dee375dc 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -370,6 +370,8 @@ def _sampler_from_config( Returns a ParameterRandomizationSettings when the environment_parameter_config argument corresponds to a sampler and None otherwise. """ + if environment_parameter_config is None: + return None if isinstance(environment_parameter_config, (float, int)): sampler = ConstantSettings(value=float(environment_parameter_config)) return sampler @@ -429,7 +431,6 @@ def structure(d: Mapping, t: type) -> Dict[str, "EnvironmentParameterSettings"]: maybe_sampler = EnvironmentParameterSettings._sampler_from_config( lesson_config["value"] ) - print(lesson_config["value"], maybe_sampler) if "value" not in lesson_config or maybe_sampler is None: raise TrainerConfigError( f"Parameter {environment_parameter} in lesson {lesson_name} does not contain a valid value." diff --git a/ml-agents/mlagents/trainers/tests/test_curriculum.py b/ml-agents/mlagents/trainers/tests/test_curriculum.py deleted file mode 100644 index 2740206924..0000000000 --- a/ml-agents/mlagents/trainers/tests/test_curriculum.py +++ /dev/null @@ -1,77 +0,0 @@ -import pytest - -from mlagents.trainers.exception import CurriculumConfigError -from mlagents.trainers.curriculum import Curriculum -from mlagents.trainers.settings import CurriculumSettings - - -dummy_curriculum_config = CurriculumSettings( - measure="reward", - thresholds=[10, 20, 50], - min_lesson_length=3, - signal_smoothing=True, - parameters={ - "param1": [0.7, 0.5, 0.3, 0.1], - "param2": [100, 50, 20, 15], - "param3": [0.2, 0.3, 0.7, 0.9], - }, -) - -bad_curriculum_config = CurriculumSettings( - measure="reward", - thresholds=[10, 20, 50], - min_lesson_length=3, - signal_smoothing=False, - parameters={ - "param1": [0.7, 0.5, 0.3, 0.1], - "param2": [100, 50, 20], - "param3": [0.2, 0.3, 0.7, 0.9], - }, -) - - -@pytest.fixture -def default_reset_parameters(): - return {"param1": 1, "param2": 1, "param3": 1} - - -def test_init_curriculum_happy_path(): - curriculum = Curriculum("TestBrain", dummy_curriculum_config) - - assert curriculum.brain_name == "TestBrain" - assert curriculum.lesson_num == 0 - assert curriculum.measure == "reward" - - -def test_increment_lesson(): - curriculum = Curriculum("TestBrain", dummy_curriculum_config) - assert curriculum.lesson_num == 0 - - curriculum.lesson_num = 1 - assert curriculum.lesson_num == 1 - - assert not curriculum.increment_lesson(10) - assert curriculum.lesson_num == 1 - - assert curriculum.increment_lesson(30) - assert curriculum.lesson_num == 2 - - assert not curriculum.increment_lesson(30) - assert curriculum.lesson_num == 2 - - assert curriculum.increment_lesson(10000) - assert curriculum.lesson_num == 3 - - -def test_get_parameters(): - curriculum = Curriculum("TestBrain", dummy_curriculum_config) - assert curriculum.get_config() == {"param1": 0.7, "param2": 100, "param3": 0.2} - - curriculum.lesson_num = 2 - assert curriculum.get_config() == {"param1": 0.3, "param2": 20, "param3": 0.7} - assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2} - - -def test_load_bad_curriculum_file_raises_error(): - with pytest.raises(CurriculumConfigError): - Curriculum("TestBrain", bad_curriculum_config) diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py new file mode 100644 index 0000000000..b4600dca5c --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py @@ -0,0 +1,256 @@ +import pytest +import yaml + + +from mlagents.trainers.exception import TrainerConfigError +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager +from mlagents.trainers.settings import ( + RunOptions, + UniformSettings, + GaussianSettings, + ConstantSettings, + CompletionCriteriaSettings, +) + + +test_sampler_config_yaml = """ +environment_parameters: + param_1: + sampler_type: uniform + sampler_parameters: + min_value: 0.5 + max_value: 10 +""" + + +def test_sampler_conversion(): + run_options = RunOptions.from_dict(yaml.safe_load(test_sampler_config_yaml)) + assert run_options.environment_parameters is not None + assert "param_1" in run_options.environment_parameters + lessons = run_options.environment_parameters["param_1"].lessons + assert len(lessons) == 1 + assert lessons[0].completion_criteria is None + assert isinstance(lessons[0].sampler, UniformSettings) + assert lessons[0].sampler.min_value == 0.5 + assert lessons[0].sampler.max_value == 10 + + +test_sampler_and_constant_config_yaml = """ +environment_parameters: + param_1: + sampler_type: gaussian + sampler_parameters: + mean: 4 + st_dev: 5 + param_2: 20 +""" + + +def test_sampler_and_constant_conversion(): + run_options = RunOptions.from_dict( + yaml.safe_load(test_sampler_and_constant_config_yaml) + ) + assert "param_1" in run_options.environment_parameters + assert "param_2" in run_options.environment_parameters + lessons_1 = run_options.environment_parameters["param_1"].lessons + lessons_2 = run_options.environment_parameters["param_2"].lessons + # gaussian + assert isinstance(lessons_1[0].sampler, GaussianSettings) + assert lessons_1[0].sampler.mean == 4 + assert lessons_1[0].sampler.st_dev == 5 + # constant + assert isinstance(lessons_2[0].sampler, ConstantSettings) + assert lessons_2[0].sampler.value == 20 + + +test_curriculum_config_yaml = """ +environment_parameters: + param_1: + curriculum: + - FirstLesson: + completion_criteria: + measure: reward + behavior: fake_behavior + threshold: 30 + min_lesson_length: 100 + require_reset: true + value: 1 + - SecondLesson: + completion_criteria: + measure: reward + behavior: fake_behavior + threshold: 60 + min_lesson_length: 100 + require_reset: false + value: 2 + - LastLesson: + value: + sampler_type: uniform + sampler_parameters: + min_value: 1 + max_value: 3 +""" + + +def test_curriculum_conversion(): + run_options = RunOptions.from_dict(yaml.safe_load(test_curriculum_config_yaml)) + assert "param_1" in run_options.environment_parameters + lessons = run_options.environment_parameters["param_1"].lessons + assert len(lessons) == 3 + # First lesson + lesson = lessons[0] + assert lesson.completion_criteria is not None + assert ( + lesson.completion_criteria.measure + == CompletionCriteriaSettings.MeasureType.REWARD + ) + assert lesson.completion_criteria.behavior == "fake_behavior" + assert lesson.completion_criteria.threshold == 30.0 + assert lesson.completion_criteria.min_lesson_length == 100 + assert lesson.completion_criteria.require_reset + assert isinstance(lesson.sampler, ConstantSettings) + assert lesson.sampler.value == 1 + # Second lesson + lesson = lessons[1] + assert lesson.completion_criteria is not None + assert ( + lesson.completion_criteria.measure + == CompletionCriteriaSettings.MeasureType.REWARD + ) + assert lesson.completion_criteria.behavior == "fake_behavior" + assert lesson.completion_criteria.threshold == 60.0 + assert lesson.completion_criteria.min_lesson_length == 100 + assert not lesson.completion_criteria.require_reset + assert isinstance(lesson.sampler, ConstantSettings) + assert lesson.sampler.value == 2 + # Last lesson + lesson = lessons[2] + assert lesson.completion_criteria is None + assert isinstance(lesson.sampler, UniformSettings) + assert lesson.sampler.min_value == 1 + assert lesson.sampler.max_value == 3 + + +test_bad_curriculum_no_competion_criteria_config_yaml = """ +environment_parameters: + param_1: + curriculum: + - FirstLesson: + completion_criteria: + measure: reward + behavior: fake_behavior + threshold: 30 + min_lesson_length: 100 + require_reset: true + value: 1 + - SecondLesson: + value: 2 + - LastLesson: + value: + sampler_type: uniform + sampler_parameters: + min_value: 1 + max_value: 3 +""" + + +def test_curriculum_raises_no_completion_criteria_conversion(): + with pytest.raises(TrainerConfigError): + RunOptions.from_dict( + yaml.safe_load(test_bad_curriculum_no_competion_criteria_config_yaml) + ) + + +test_everything_config_yaml = """ +environment_parameters: + param_1: + curriculum: + - FirstLesson: + completion_criteria: + measure: reward + behavior: fake_behavior + threshold: 30 + min_lesson_length: 100 + require_reset: true + value: 1 + - SecondLesson: + completion_criteria: + measure: progress + behavior: fake_behavior + threshold: 0.5 + min_lesson_length: 100 + require_reset: false + value: 2 + - LastLesson: + value: + sampler_type: uniform + sampler_parameters: + min_value: 1 + max_value: 3 + param_2: + sampler_type: gaussian + sampler_parameters: + mean: 4 + st_dev: 5 + param_3: 20 +""" + + +def test_create_manager(): + run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml)) + param_manager = EnvironmentParameterManager( + run_options.environment_parameters, 1337, False + ) + assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100 + assert param_manager.get_current_lesson_number() == { + "param_1": 0, + "param_2": 0, + "param_3": 0, + } + assert param_manager.get_current_samplers() == { + "param_1": ConstantSettings(seed=1337, value=1), + "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), + "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), + } + # Not enough episodes completed + assert param_manager.update_lessons( + trainer_steps={"fake_behavior": 500}, + trainer_max_steps={"fake_behavior": 1000}, + trainer_reward_buffer={"fake_behavior": [1000] * 99}, + ) == (False, False) + # Not enough episodes reward + assert param_manager.update_lessons( + trainer_steps={"fake_behavior": 500}, + trainer_max_steps={"fake_behavior": 1000}, + trainer_reward_buffer={"fake_behavior": [1] * 101}, + ) == (False, False) + assert param_manager.update_lessons( + trainer_steps={"fake_behavior": 500}, + trainer_max_steps={"fake_behavior": 1000}, + trainer_reward_buffer={"fake_behavior": [1000] * 101}, + ) == (True, True) + assert param_manager.get_current_lesson_number() == { + "param_1": 1, + "param_2": 0, + "param_3": 0, + } + param_manager_2 = EnvironmentParameterManager( + run_options.environment_parameters, 1337, restore=True + ) + # The use of global status should make it so that the lesson numbers are maintained + assert param_manager_2.get_current_lesson_number() == { + "param_1": 1, + "param_2": 0, + "param_3": 0, + } + # No reset required + assert param_manager.update_lessons( + trainer_steps={"fake_behavior": 700}, + trainer_max_steps={"fake_behavior": 1000}, + trainer_reward_buffer={"fake_behavior": [0] * 101}, + ) == (True, False) + assert param_manager.get_current_samplers() == { + "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3), + "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), + "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), + } diff --git a/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py b/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py deleted file mode 100644 index 6bff34f841..0000000000 --- a/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py +++ /dev/null @@ -1,136 +0,0 @@ -import pytest -from unittest.mock import patch, Mock, call -import yaml -import cattr - -from mlagents.trainers.meta_curriculum import MetaCurriculum - -from mlagents.trainers.tests.simple_test_envs import SimpleEnvironment -from mlagents.trainers.tests.test_simple_rl import ( - _check_environment_trains, - BRAIN_NAME, - PPO_CONFIG, -) -from mlagents.trainers.tests.test_curriculum import dummy_curriculum_config -from mlagents.trainers.settings import CurriculumSettings -from mlagents.trainers.training_status import StatusType - - -@pytest.fixture -def measure_vals(): - return {"Brain1": 0.2, "Brain2": 0.3} - - -@pytest.fixture -def reward_buff_sizes(): - return {"Brain1": 7, "Brain2": 8} - - -def test_convert_from_dict(): - config = yaml.safe_load( - """ - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - param1: [0.0, 4.0, 6.0, 8.0] - """ - ) - should_be_config = CurriculumSettings( - thresholds=[0.1, 0.3, 0.5], - min_lesson_length=100, - signal_smoothing=True, - measure=CurriculumSettings.MeasureType.PROGRESS, - parameters={"param1": [0.0, 4.0, 6.0, 8.0]}, - ) - assert cattr.structure(config, CurriculumSettings) == should_be_config - - -def test_curriculum_config(param_name="test_param1", min_lesson_length=100): - return CurriculumSettings( - thresholds=[0.1, 0.3, 0.5], - min_lesson_length=min_lesson_length, - parameters={f"{param_name}": [0.0, 4.0, 6.0, 8.0]}, - ) - - -test_meta_curriculum_config = { - "Brain1": test_curriculum_config("test_param1"), - "Brain2": test_curriculum_config("test_param2"), -} - - -def test_set_lesson_nums(): - meta_curriculum = MetaCurriculum(test_meta_curriculum_config) - meta_curriculum.lesson_nums = {"Brain1": 1, "Brain2": 3} - - assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 1 - assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 3 - - -def test_increment_lessons(measure_vals): - meta_curriculum = MetaCurriculum(test_meta_curriculum_config) - meta_curriculum.brains_to_curricula["Brain1"] = Mock() - meta_curriculum.brains_to_curricula["Brain2"] = Mock() - - meta_curriculum.increment_lessons(measure_vals) - - meta_curriculum.brains_to_curricula["Brain1"].increment_lesson.assert_called_with( - 0.2 - ) - meta_curriculum.brains_to_curricula["Brain2"].increment_lesson.assert_called_with( - 0.3 - ) - - -@patch("mlagents.trainers.curriculum.Curriculum") -@patch("mlagents.trainers.curriculum.Curriculum") -def test_increment_lessons_with_reward_buff_sizes( - curriculum_a, curriculum_b, measure_vals, reward_buff_sizes -): - curriculum_a.min_lesson_length = 5 - curriculum_b.min_lesson_length = 10 - meta_curriculum = MetaCurriculum(test_meta_curriculum_config) - meta_curriculum.brains_to_curricula["Brain1"] = curriculum_a - meta_curriculum.brains_to_curricula["Brain2"] = curriculum_b - - meta_curriculum.increment_lessons(measure_vals, reward_buff_sizes=reward_buff_sizes) - - curriculum_a.increment_lesson.assert_called_with(0.2) - curriculum_b.increment_lesson.assert_not_called() - - -@patch("mlagents.trainers.meta_curriculum.GlobalTrainingStatus") -def test_restore_curriculums(mock_trainingstatus): - meta_curriculum = MetaCurriculum(test_meta_curriculum_config) - # Test restore to value - mock_trainingstatus.get_parameter_state.return_value = 2 - meta_curriculum.try_restore_all_curriculum() - mock_trainingstatus.get_parameter_state.assert_has_calls( - [call("Brain1", StatusType.LESSON_NUM), call("Brain2", StatusType.LESSON_NUM)], - any_order=True, - ) - assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 2 - assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 2 - - # Test restore to None - mock_trainingstatus.get_parameter_state.return_value = None - meta_curriculum.try_restore_all_curriculum() - - assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 0 - assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 0 - - -def test_get_config(): - meta_curriculum = MetaCurriculum(test_meta_curriculum_config) - assert meta_curriculum.get_config() == {"test_param1": 0.0, "test_param2": 0.0} - - -@pytest.mark.parametrize("curriculum_brain_name", [BRAIN_NAME, "WrongBrainName"]) -def test_simple_metacurriculum(curriculum_brain_name): - env = SimpleEnvironment([BRAIN_NAME], use_discrete=False) - mc = MetaCurriculum({curriculum_brain_name: dummy_curriculum_config}) - _check_environment_trains( - env, {BRAIN_NAME: PPO_CONFIG}, meta_curriculum=mc, success_threshold=None - ) From 2d7feac2dc79460356940bfe100b612a7401fdc2 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 24 Jun 2020 14:06:10 -0700 Subject: [PATCH 05/26] Update to Upgrade for Updates --- ml-agents/mlagents/trainers/upgrade_config.py | 102 +++++++++++++++--- 1 file changed, 87 insertions(+), 15 deletions(-) diff --git a/ml-agents/mlagents/trainers/upgrade_config.py b/ml-agents/mlagents/trainers/upgrade_config.py index 4263e6cf1f..7c958e882f 100644 --- a/ml-agents/mlagents/trainers/upgrade_config.py +++ b/ml-agents/mlagents/trainers/upgrade_config.py @@ -5,7 +5,7 @@ import attr import cattr import yaml -from typing import Dict, Any +from typing import Dict, Any, Optional import argparse from mlagents.trainers.settings import TrainerSettings, NetworkSettings, TrainerType from mlagents.trainers.cli_utils import load_config @@ -99,13 +99,60 @@ def convert_samplers(old_sampler_config: Dict[str, Any]) -> Dict[str, Any]: return new_sampler_config +def convert_samplers_and_curriculum( + parameter_dict: Dict[str, Any], curriculum: Dict[str, Any] +) -> Dict[str, Any]: + if "resampling-interval" in parameter_dict: + parameter_dict.pop("resampling-interval") + param_set = set(parameter_dict.keys()) + for behavior_name, behavior_dict in curriculum.items(): + measure = behavior_dict["measure"] + min_lesson_length = behavior_dict.get("min_lesson_length", 1) + signal_smoothing = behavior_dict.get("signal_smoothing", False) + thresholds = behavior_dict["thresholds"] + num_lessons = len(thresholds) + 1 + parameters = behavior_dict["parameters"] + for param_name in parameters.keys(): + if param_name in param_set: + print( + f"The parameter {param_name} has both a sampler and a curriculum. Will ignore curriculum" + ) + else: + param_set.add(param_name) + parameter_dict[param_name] = {"curriculum": []} + for lesson_index in range(num_lessons - 1): + parameter_dict[param_name]["curriculum"].append( + { + f"Lesson{lesson_index}": { + "completion_criteria": { + "measure": measure, + "behavior": behavior_name, + "signal_smoothing": signal_smoothing, + "min_lesson_length": min_lesson_length, + "threshold": thresholds[lesson_index], + }, + "value": parameters[param_name][lesson_index], + } + } + ) + lesson_index += 1 # This is the last lesson + parameter_dict[param_name]["curriculum"].append( + { + f"Lesson{lesson_index}": { + "value": parameters[param_name][lesson_index] + } + } + ) + return parameter_dict + + def parse_args(): argparser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) argparser.add_argument( "trainer_config_path", - help="Path to old format (<=0.16.X) trainer configuration YAML.", + help="Path to old format (<=0.18.X) trainer configuration YAML.", ) argparser.add_argument( "--curriculum", @@ -124,6 +171,40 @@ def parse_args(): return args +def convert( + config: Dict[str, Any], + old_curriculum: Optional[Dict[str, Any]], + old_param_random: Optional[Dict[str, Any]], +) -> Dict[str, Any]: + if "behaviors" not in config: + print("Config file format version : version <= 0.16.X") + behavior_config_dict = convert_behaviors(config) + full_config = {"behaviors": behavior_config_dict} + + # Convert curriculum and sampler. note that we don't validate these; if it was correct + # before it should be correct now. + if old_curriculum is not None: + full_config["curriculum"] = old_curriculum + + if old_param_random is not None: + sampler_config_dict = convert_samplers(old_param_random) + full_config["parameter_randomization"] = sampler_config_dict + + # Convert config to dict + config = cattr.unstructure(full_config) + if "curriculum" in config or "parameter_randomization" in config: + print("Config file format version : 0.16.X < version <= 0.18.X") + full_config = {"behaviors": config["behaviors"]} + + full_config["environment_parameters"] = convert_samplers_and_curriculum( + config.get("parameter_randomization", {}), config.get("curriculum", {}) + ) + + # Convert config to dict + config = cattr.unstructure(full_config) + return config + + def main() -> None: args = parse_args() print( @@ -131,23 +212,14 @@ def main() -> None: ) old_config = load_config(args.trainer_config_path) - behavior_config_dict = convert_behaviors(old_config) - full_config = {"behaviors": behavior_config_dict} - - # Convert curriculum and sampler. note that we don't validate these; if it was correct - # before it should be correct now. + curriculum_config_dict = None + old_sampler_config_dict = None if args.curriculum is not None: curriculum_config_dict = load_config(args.curriculum) - full_config["curriculum"] = curriculum_config_dict - if args.sampler is not None: old_sampler_config_dict = load_config(args.sampler) - sampler_config_dict = convert_samplers(old_sampler_config_dict) - full_config["parameter_randomization"] = sampler_config_dict - - # Convert config to dict - unstructed_config = cattr.unstructure(full_config) - unstructed_config = remove_nones(unstructed_config) + new_config = convert(old_config, curriculum_config_dict, old_sampler_config_dict) + unstructed_config = remove_nones(new_config) write_to_yaml_file(unstructed_config, args.output_config_path) From e3a14b2754a3c57ee6e26846f43c3aa79d7986a4 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 24 Jun 2020 14:08:25 -0700 Subject: [PATCH 06/26] fixing the tests --- .../trainers/tests/test_config_conversion.py | 86 ++++++++++--------- .../mlagents/trainers/tests/test_learn.py | 28 ------ .../mlagents/trainers/tests/test_settings.py | 83 ++++++++++++++---- .../mlagents/trainers/tests/test_simple_rl.py | 6 +- .../trainers/tests/test_trainer_controller.py | 4 +- 5 files changed, 113 insertions(+), 94 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/test_config_conversion.py b/ml-agents/mlagents/trainers/tests/test_config_conversion.py index 49a1489f12..721fb33361 100644 --- a/ml-agents/mlagents/trainers/tests/test_config_conversion.py +++ b/ml-agents/mlagents/trainers/tests/test_config_conversion.py @@ -1,9 +1,7 @@ import yaml import pytest -from unittest import mock -from argparse import Namespace -from mlagents.trainers.upgrade_config import convert_behaviors, main, remove_nones +from mlagents.trainers.upgrade_config import convert_behaviors, remove_nones, convert from mlagents.trainers.settings import ( TrainerType, PPOSettings, @@ -125,6 +123,41 @@ encoding_size: 128 """ +CURRICULUM = """ + + BigWallJump: + measure: progress + thresholds: [0.1, 0.3, 0.5] + min_lesson_length: 100 + signal_smoothing: true + parameters: + big_wall_min_height: [0.0, 4.0, 6.0, 8.0] + big_wall_max_height: [4.0, 7.0, 8.0, 8.0] + SmallWallJump: + measure: progress + thresholds: [0.1, 0.3, 0.5] + min_lesson_length: 100 + signal_smoothing: true + parameters: + small_wall_height: [1.5, 2.0, 2.5, 4.0] + """ + +RANDOMIZATION = """ + resampling-interval: 5000 + mass: + sampler-type: uniform + min_value: 0.5 + max_value: 10 + gravity: + sampler-type: uniform + min_value: 7 + max_value: 12 + scale: + sampler-type: uniform + min_value: 0.75 + max_value: 3 + """ + @pytest.mark.parametrize("use_recurrent", [True, False]) @pytest.mark.parametrize("trainer_type", [TrainerType.PPO, TrainerType.SAC]) @@ -152,45 +185,14 @@ def test_convert_behaviors(trainer_type, use_recurrent): assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals -@mock.patch("mlagents.trainers.upgrade_config.convert_samplers") -@mock.patch("mlagents.trainers.upgrade_config.convert_behaviors") -@mock.patch("mlagents.trainers.upgrade_config.remove_nones") -@mock.patch("mlagents.trainers.upgrade_config.write_to_yaml_file") -@mock.patch("mlagents.trainers.upgrade_config.parse_args") -@mock.patch("mlagents.trainers.upgrade_config.load_config") -def test_main( - mock_load, - mock_parse, - yaml_write_mock, - remove_none_mock, - mock_convert_behaviors, - mock_convert_samplers, -): - test_output_file = "test.yaml" - mock_load.side_effect = [ - yaml.safe_load(PPO_CONFIG), - "test_curriculum_config", - "test_sampler_config", - ] - mock_args = Namespace( - trainer_config_path="mock", - output_config_path=test_output_file, - curriculum="test", - sampler="test", - ) - mock_parse.return_value = mock_args - mock_convert_behaviors.return_value = "test_converted_config" - mock_convert_samplers.return_value = "test_converted_sampler_config" - dict_without_nones = mock.Mock(name="nonones") - remove_none_mock.return_value = dict_without_nones - - main() - saved_dict = remove_none_mock.call_args[0][0] - # Check that the output of the remove_none call is here - yaml_write_mock.assert_called_with(dict_without_nones, test_output_file) - assert saved_dict["behaviors"] == "test_converted_config" - assert saved_dict["curriculum"] == "test_curriculum_config" - assert saved_dict["parameter_randomization"] == "test_converted_sampler_config" +def test_convert(): + old_behaviors = yaml.safe_load(PPO_CONFIG) + old_curriculum = yaml.safe_load(CURRICULUM) + old_sampler = yaml.safe_load(RANDOMIZATION) + config = convert(old_behaviors, old_curriculum, old_sampler) + assert BRAIN_NAME in config["behaviors"] + assert "big_wall_min_height" in config["environment_parameters"] + assert "gravity" in config["environment_parameters"] def test_remove_nones(): diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index 167fe157e3..4bcc9fc460 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -7,7 +7,6 @@ from mlagents.trainers.cli_utils import DetectDefault from mlagents_envs.exception import UnityEnvironmentException from mlagents.trainers.stats import StatsReporter -from mlagents.trainers.settings import UniformSettings def basic_options(extra_args=None): @@ -44,22 +43,6 @@ def basic_options(extra_args=None): debug: false """ -MOCK_SAMPLER_CURRICULUM_YAML = """ - parameter_randomization: - sampler1: - sampler_type: uniform - sampler_parameters: - min_value: 0.2 - - curriculum: - behavior1: - parameters: - foo: [0.2, 0.5] - behavior2: - parameters: - foo: [0.2, 0.5] - """ - @patch("mlagents.trainers.learn.write_timing_tree") @patch("mlagents.trainers.learn.write_run_options") @@ -121,7 +104,6 @@ def test_commandline_args(mock_file): opt = parse_command_line(["mytrainerpath"]) assert opt.behaviors == {} assert opt.env_settings.env_path is None - assert opt.parameter_randomization is None assert opt.checkpoint_settings.resume is False assert opt.checkpoint_settings.inference is False assert opt.checkpoint_settings.run_id == "ppo" @@ -151,7 +133,6 @@ def test_commandline_args(mock_file): opt = parse_command_line(full_args) assert opt.behaviors == {} assert opt.env_settings.env_path == "./myenvfile" - assert opt.parameter_randomization is None assert opt.checkpoint_settings.run_id == "myawesomerun" assert opt.checkpoint_settings.initialize_from == "testdir" assert opt.env_settings.seed == 7890 @@ -170,7 +151,6 @@ def test_yaml_args(mock_file): opt = parse_command_line(["mytrainerpath"]) assert opt.behaviors == {} assert opt.env_settings.env_path == "./oldenvfile" - assert opt.parameter_randomization is None assert opt.checkpoint_settings.run_id == "uselessrun" assert opt.checkpoint_settings.initialize_from == "notuselessrun" assert opt.env_settings.seed == 9870 @@ -197,7 +177,6 @@ def test_yaml_args(mock_file): opt = parse_command_line(full_args) assert opt.behaviors == {} assert opt.env_settings.env_path == "./myenvfile" - assert opt.parameter_randomization is None assert opt.checkpoint_settings.run_id == "myawesomerun" assert opt.env_settings.seed == 7890 assert opt.env_settings.base_port == 4004 @@ -208,13 +187,6 @@ def test_yaml_args(mock_file): assert opt.checkpoint_settings.resume is True -@patch("builtins.open", new_callable=mock_open, read_data=MOCK_SAMPLER_CURRICULUM_YAML) -def test_sampler_configs(mock_file): - opt = parse_command_line(["mytrainerpath"]) - assert isinstance(opt.parameter_randomization["sampler1"], UniformSettings) - assert len(opt.curriculum.keys()) == 2 - - @patch("builtins.open", new_callable=mock_open, read_data=MOCK_YAML) def test_env_args(mock_file): full_args = [ diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index 19f8e9ecbb..6cdcd9440a 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -12,7 +12,8 @@ RewardSignalType, RewardSignalSettings, CuriositySettings, - ParameterRandomizationSettings, + EnvironmentParameterSettings, + ConstantSettings, UniformSettings, GaussianSettings, MultiRangeUniformSettings, @@ -164,11 +165,11 @@ def test_memory_settings_validation(): NetworkSettings.MemorySettings(sequence_length=128, memory_size=0) -def test_parameter_randomization_structure(): +def test_env_parameter_structure(): """ - Tests the ParameterRandomizationSettings structure method and all validators. + Tests the EnvironmentParameterSettings structure method and all validators. """ - parameter_randomization_dict = { + env_params_dict = { "mass": { "sampler_type": "uniform", "sampler_parameters": {"min_value": 1.0, "max_value": 2.0}, @@ -181,14 +182,36 @@ def test_parameter_randomization_structure(): "sampler_type": "multirangeuniform", "sampler_parameters": {"intervals": [[1.0, 2.0], [3.0, 4.0]]}, }, + "gravity": 1, + "wall_height": { + "curriculum": [ + { + "Lesson1": { + "completion_criteria": { + "measure": "reward", + "behavior": "fake_behavior", + "threshold": 10, + }, + "value": 1, + } + }, + {"Lesson2": {"value": 4}}, + ] + }, } - parameter_randomization_distributions = ParameterRandomizationSettings.structure( - parameter_randomization_dict, Dict[str, ParameterRandomizationSettings] + env_param_settings = EnvironmentParameterSettings.structure( + env_params_dict, Dict[str, EnvironmentParameterSettings] + ) + assert isinstance(env_param_settings["mass"].lessons[0].sampler, UniformSettings) + assert isinstance(env_param_settings["scale"].lessons[0].sampler, GaussianSettings) + assert isinstance( + env_param_settings["length"].lessons[0].sampler, MultiRangeUniformSettings + ) + assert isinstance( + env_param_settings["wall_height"].lessons[0].sampler, ConstantSettings ) - assert isinstance(parameter_randomization_distributions["mass"], UniformSettings) - assert isinstance(parameter_randomization_distributions["scale"], GaussianSettings) assert isinstance( - parameter_randomization_distributions["length"], MultiRangeUniformSettings + env_param_settings["wall_height"].lessons[1].sampler, ConstantSettings ) # Check invalid distribution type @@ -199,8 +222,8 @@ def test_parameter_randomization_structure(): } } with pytest.raises(ValueError): - ParameterRandomizationSettings.structure( - invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + EnvironmentParameterSettings.structure( + invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] ) # Check min less than max in uniform @@ -211,8 +234,8 @@ def test_parameter_randomization_structure(): } } with pytest.raises(TrainerConfigError): - ParameterRandomizationSettings.structure( - invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + EnvironmentParameterSettings.structure( + invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] ) # Check min less than max in multirange @@ -223,8 +246,8 @@ def test_parameter_randomization_structure(): } } with pytest.raises(TrainerConfigError): - ParameterRandomizationSettings.structure( - invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + EnvironmentParameterSettings.structure( + invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] ) # Check multirange has valid intervals @@ -235,12 +258,34 @@ def test_parameter_randomization_structure(): } } with pytest.raises(TrainerConfigError): - ParameterRandomizationSettings.structure( - invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + EnvironmentParameterSettings.structure( + invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] ) # Check non-Dict input with pytest.raises(TrainerConfigError): - ParameterRandomizationSettings.structure( - "notadict", Dict[str, ParameterRandomizationSettings] + EnvironmentParameterSettings.structure( + "notadict", Dict[str, EnvironmentParameterSettings] + ) + + invalid_curriculum_dict = { + "wall_height": { + "curriculum": [ + { + "Lesson1": { + "completion_criteria": { + "measure": "progress", + "behavior": "fake_behavior", + "threshold": 10, + }, # > 1 is too large + "value": 1, + }, + "Lesson2": {"value": 4}, + } + ] + } + } + with pytest.raises(TrainerConfigError): + EnvironmentParameterSettings.structure( + invalid_curriculum_dict, Dict[str, EnvironmentParameterSettings] ) diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py index 67d3c66617..ca8e0c852c 100644 --- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py +++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py @@ -107,7 +107,7 @@ def _check_environment_trains( env, trainer_config, reward_processor=default_reward_processor, - meta_curriculum=None, + env_parameter_manager=None, success_threshold=0.9, env_manager=None, ): @@ -126,7 +126,7 @@ def _check_environment_trains( train_model=True, load_model=False, seed=seed, - meta_curriculum=meta_curriculum, + param_manager=env_parameter_manager, multi_gpu=False, ) @@ -134,7 +134,7 @@ def _check_environment_trains( trainer_factory=trainer_factory, output_path=dir, run_id=run_id, - meta_curriculum=meta_curriculum, + param_manager=env_parameter_manager, train=True, training_seed=seed, ) diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py index 8a0280dc4b..ac02f0323b 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py @@ -14,7 +14,7 @@ def basic_trainer_controller(): trainer_factory=trainer_factory_mock, output_path="test_model_path", run_id="test_run_id", - meta_curriculum=None, + param_manager=None, train=True, training_seed=99, ) @@ -30,7 +30,7 @@ def test_initialization_seed(numpy_random_seed, tensorflow_set_seed): trainer_factory=trainer_factory_mock, output_path="", run_id="1", - meta_curriculum=None, + param_manager=None, train=True, training_seed=seed, ) From e33996b3bea69525c56011e044bd67544d603197 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 24 Jun 2020 14:12:28 -0700 Subject: [PATCH 07/26] Upgraded the config files --- config/ppo/3DBall_randomize.yaml | 33 +++------ config/ppo/WallJump_curriculum.yaml | 102 +++++++++++++++++++++++----- 2 files changed, 96 insertions(+), 39 deletions(-) diff --git a/config/ppo/3DBall_randomize.yaml b/config/ppo/3DBall_randomize.yaml index e3eb5e453c..fd18b3aeeb 100644 --- a/config/ppo/3DBall_randomize.yaml +++ b/config/ppo/3DBall_randomize.yaml @@ -24,27 +24,16 @@ behaviors: time_horizon: 1000 summary_freq: 12000 threaded: true - environment_parameters: mass: - sampler_type: uniform - sampler_parameters: - min_value: 0.5 - max_value: 10 - scale: - curriculum: - - FirstLessonForScale: - completion_criteria: - measure: reward - behavior: 3DBall - threshold: 30 - min_lesson_length: 100 - require_reset: true - value: 1 - - SecondLessonForScale: - value: - sampler_type: uniform - sampler_parameters: - min_value: 0.75 - max_value: 3 - + sampler-type: uniform + min_value: 0.5 + max_value: 10 + gravity: + sampler-type: uniform + min_value: 7 + max_value: 12 + scale: + sampler-type: uniform + min_value: 0.75 + max_value: 3 diff --git a/config/ppo/WallJump_curriculum.yaml b/config/ppo/WallJump_curriculum.yaml index 93a8813ae7..db0b2bdc35 100644 --- a/config/ppo/WallJump_curriculum.yaml +++ b/config/ppo/WallJump_curriculum.yaml @@ -49,20 +49,88 @@ behaviors: time_horizon: 128 summary_freq: 20000 threaded: true - -curriculum: - BigWallJump: - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - big_wall_min_height: [0.0, 4.0, 6.0, 8.0] - big_wall_max_height: [4.0, 7.0, 8.0, 8.0] - SmallWallJump: - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - small_wall_height: [1.5, 2.0, 2.5, 4.0] +environment_parameters: + big_wall_min_height: + curriculum: + - Lesson0: + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.1 + value: 0.0 + - Lesson1: + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.3 + value: 4.0 + - Lesson2: + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.5 + value: 6.0 + - Lesson3: + value: 8.0 + big_wall_max_height: + curriculum: + - Lesson0: + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.1 + value: 4.0 + - Lesson1: + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.3 + value: 7.0 + - Lesson2: + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.5 + value: 8.0 + - Lesson3: + value: 8.0 + small_wall_height: + curriculum: + - Lesson0: + completion_criteria: + measure: progress + behavior: SmallWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.1 + value: 1.5 + - Lesson1: + completion_criteria: + measure: progress + behavior: SmallWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.3 + value: 2.0 + - Lesson2: + completion_criteria: + measure: progress + behavior: SmallWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.5 + value: 2.5 + - Lesson3: + value: 4.0 From c9b0214aed6f407d6b956936eea0043c722b8eeb Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 24 Jun 2020 14:58:35 -0700 Subject: [PATCH 08/26] Fixes --- config/ppo/3DBall_randomize.yaml | 18 +++++----- .../trainers/environment_parameter_manager.py | 36 ++++++++++--------- ml-agents/mlagents/trainers/upgrade_config.py | 29 +++++++++++++-- 3 files changed, 53 insertions(+), 30 deletions(-) diff --git a/config/ppo/3DBall_randomize.yaml b/config/ppo/3DBall_randomize.yaml index fd18b3aeeb..31f472c541 100644 --- a/config/ppo/3DBall_randomize.yaml +++ b/config/ppo/3DBall_randomize.yaml @@ -26,14 +26,12 @@ behaviors: threaded: true environment_parameters: mass: - sampler-type: uniform - min_value: 0.5 - max_value: 10 - gravity: - sampler-type: uniform - min_value: 7 - max_value: 12 + sampler_type: uniform + sampler_parameters: + min_value: 0.5 + max_value: 10 scale: - sampler-type: uniform - min_value: 0.75 - max_value: 3 + sampler_type: uniform + sampler_parameters: + min_value: 0.75 + max_value: 3 diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py index 2293d5869a..9a1120c0be 100644 --- a/ml-agents/mlagents/trainers/environment_parameter_manager.py +++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py @@ -90,24 +90,26 @@ def update_lessons( and len(settings.lessons) > lesson_num ): behavior_to_consider = lesson.completion_criteria.behavior - must_increment, new_smoothing = self._need_increment( - lesson.completion_criteria, - float(trainer_steps[behavior_to_consider]) - / float(trainer_max_steps[behavior_to_consider]), - trainer_reward_buffer[behavior_to_consider], - self._smoothing_values[param_name], - ) - self._smoothing_values[param_name] = new_smoothing - if must_increment: - GlobalTrainingStatus.set_parameter_state( - param_name, StatusType.LESSON_NUM, lesson_num + 1 - ) - logger.info( - f"Parameter '{param_name}' has changed. Now in lesson '{settings.lessons[lesson_num+1].name}'" + if behavior_to_consider in trainer_steps: + must_increment, new_smoothing = self._need_increment( + lesson.completion_criteria, + float(trainer_steps[behavior_to_consider]) + / float(trainer_max_steps[behavior_to_consider]), + trainer_reward_buffer[behavior_to_consider], + self._smoothing_values[param_name], ) - updated = True - if lesson.completion_criteria.require_reset: - must_reset = True + self._smoothing_values[param_name] = new_smoothing + if must_increment: + GlobalTrainingStatus.set_parameter_state( + param_name, StatusType.LESSON_NUM, lesson_num + 1 + ) + new_lesson_name = settings.lessons[lesson_num + 1].name + logger.info( + f"Parameter '{param_name}' has changed. Now in lesson '{new_lesson_name}'" + ) + updated = True + if lesson.completion_criteria.require_reset: + must_reset = True return updated, must_reset @staticmethod diff --git a/ml-agents/mlagents/trainers/upgrade_config.py b/ml-agents/mlagents/trainers/upgrade_config.py index 7c958e882f..e1c8a05ad7 100644 --- a/ml-agents/mlagents/trainers/upgrade_config.py +++ b/ml-agents/mlagents/trainers/upgrade_config.py @@ -102,8 +102,20 @@ def convert_samplers(old_sampler_config: Dict[str, Any]) -> Dict[str, Any]: def convert_samplers_and_curriculum( parameter_dict: Dict[str, Any], curriculum: Dict[str, Any] ) -> Dict[str, Any]: - if "resampling-interval" in parameter_dict: - parameter_dict.pop("resampling-interval") + for key, sampler in parameter_dict.items(): + if "sampler_parameters" not in sampler: + parameter_dict[key]["sampler_parameters"] = {} + for argument in [ + "seed", + "min_value", + "max_value", + "mean", + "st_dev", + "intervals", + ]: + if argument in sampler: + parameter_dict[key]["sampler_parameters"][argument] = sampler[argument] + parameter_dict[key].pop(argument) param_set = set(parameter_dict.keys()) for behavior_name, behavior_dict in curriculum.items(): measure = behavior_dict["measure"] @@ -196,8 +208,19 @@ def convert( print("Config file format version : 0.16.X < version <= 0.18.X") full_config = {"behaviors": config["behaviors"]} + param_randomization = config.get("parameter_randomization", {}) + if "resampling-interval" in param_randomization: + param_randomization.pop("resampling-interval") + if len(param_randomization) > 0: + # check if we use the old format sampler-type vs sampler_type + if ( + "sampler-type" + in param_randomization[list(param_randomization.keys())[0]] + ): + param_randomization = convert_samplers(param_randomization) + full_config["environment_parameters"] = convert_samplers_and_curriculum( - config.get("parameter_randomization", {}), config.get("curriculum", {}) + param_randomization, config.get("curriculum", {}) ) # Convert config to dict From b1e01ad8fcf6fc8fdc708d6d4993ccf18749bf41 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 25 Jun 2020 11:26:47 -0700 Subject: [PATCH 09/26] Additional error catching --- ml-agents/mlagents/trainers/settings.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 65dee375dc..cfa1687142 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -403,6 +403,25 @@ def structure(d: Mapping, t: type) -> Dict[str, "EnvironmentParameterSettings"]: maybe_sampler = EnvironmentParameterSettings._sampler_from_config( environment_parameter_config ) + if isinstance(environment_parameter_config, dict): + if ( + ( + maybe_sampler is not None + and "curriculum" in environment_parameter_config + ) + or ( + maybe_sampler is not None + and "value" in environment_parameter_config + ) + or ( + "value" in environment_parameter_config + and "curriculum" in environment_parameter_config + ) + ): + raise TrainerConfigError( + f"Parameter {environment_parameter} can either be curriculum, " + "a constant value or a sampler. Not a combination of these." + ) if maybe_sampler is not None: d_final[environment_parameter] = EnvironmentParameterSettings( lessons=[ From 13d52a0a8bcae39add7a1072120e3bd0b6e4578b Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 25 Jun 2020 12:18:42 -0700 Subject: [PATCH 10/26] addressing some comments --- .../trainers/environment_parameter_manager.py | 42 ++-------- ml-agents/mlagents/trainers/settings.py | 83 +++++++++++++------ 2 files changed, 64 insertions(+), 61 deletions(-) diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py index 9a1120c0be..c54191fccf 100644 --- a/ml-agents/mlagents/trainers/environment_parameter_manager.py +++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py @@ -1,11 +1,10 @@ from typing import Dict, List, Tuple -import numpy as np -import math from mlagents.trainers.settings import ( EnvironmentParameterSettings, CompletionCriteriaSettings, ParameterRandomizationSettings, ) +from collections import defaultdict from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType from mlagents_envs.logging_util import get_logger @@ -29,9 +28,9 @@ def __init__( GlobalTrainingStatus.set_parameter_state( parameter_name, StatusType.LESSON_NUM, 0 ) - self._smoothing_values: Dict[str, float] = {} + self._smoothed_values: Dict[str, float] = defaultdict(float) for key in self._dict_settings.keys(): - self._smoothing_values[key] = 0.0 + self._smoothed_values[key] = 0.0 # Update the seeds of the samplers self._set_sampler_seeds(run_seed) @@ -91,14 +90,14 @@ def update_lessons( ): behavior_to_consider = lesson.completion_criteria.behavior if behavior_to_consider in trainer_steps: - must_increment, new_smoothing = self._need_increment( + must_increment, new_smoothing = CompletionCriteriaSettings.need_increment( lesson.completion_criteria, float(trainer_steps[behavior_to_consider]) / float(trainer_max_steps[behavior_to_consider]), trainer_reward_buffer[behavior_to_consider], - self._smoothing_values[param_name], + self._smoothed_values[param_name], ) - self._smoothing_values[param_name] = new_smoothing + self._smoothed_values[param_name] = new_smoothing if must_increment: GlobalTrainingStatus.set_parameter_state( param_name, StatusType.LESSON_NUM, lesson_num + 1 @@ -111,32 +110,3 @@ def update_lessons( if lesson.completion_criteria.require_reset: must_reset = True return updated, must_reset - - @staticmethod - def _need_increment( - increment_condition: CompletionCriteriaSettings, - progress: float, - reward_buffer: List[float], - smoothing: float, - ) -> Tuple[bool, float]: - # Is the min number of episodes reached - if len(reward_buffer) < increment_condition.min_lesson_length: - return False, smoothing - if ( - increment_condition.measure - == CompletionCriteriaSettings.MeasureType.PROGRESS - ): - if progress > increment_condition.threshold: - return True, smoothing - if increment_condition.measure == CompletionCriteriaSettings.MeasureType.REWARD: - if len(reward_buffer) < 1: - return False, smoothing - measure = np.mean(reward_buffer) - if math.isnan(measure): - return False, smoothing - if increment_condition.signal_smoothing: - measure = 0.25 * smoothing + 0.75 * measure - smoothing = measure - if measure > increment_condition.threshold: - return True, smoothing - return False, smoothing diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index cfa1687142..7b300327c0 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -5,6 +5,8 @@ import collections import argparse import abc +import numpy as np +import math from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser from mlagents.trainers.cli_utils import load_config @@ -329,6 +331,35 @@ def _check_threshold_value(self, attribute, value): "Threshold for next lesson cannot be greater negative when the measure is progress." ) + @staticmethod + def need_increment( + increment_condition: "CompletionCriteriaSettings", + progress: float, + reward_buffer: List[float], + smoothing: float, + ) -> Tuple[bool, float]: + # Is the min number of episodes reached + if len(reward_buffer) < increment_condition.min_lesson_length: + return False, smoothing + if ( + increment_condition.measure + == CompletionCriteriaSettings.MeasureType.PROGRESS + ): + if progress > increment_condition.threshold: + return True, smoothing + if increment_condition.measure == CompletionCriteriaSettings.MeasureType.REWARD: + if len(reward_buffer) < 1: + return False, smoothing + measure = np.mean(reward_buffer) + if math.isnan(measure): + return False, smoothing + if increment_condition.signal_smoothing: + measure = 0.25 * smoothing + 0.75 * measure + smoothing = measure + if measure > increment_condition.threshold: + return True, smoothing + return False, smoothing + @attr.s(auto_attribs=True) class Lesson: @@ -343,6 +374,31 @@ class Lesson: sampler: ParameterRandomizationSettings name: str + @staticmethod + def from_dict(d: Mapping) -> "Lesson": + # a lesson_dict contains a single lesson with the name of the lesson as key + completion_criteria = None + maybe_sampler = None + lesson_name = list(d.keys())[0] + lesson_config = list(d.values())[0] + if "completion_criteria" in lesson_config: + completion_criteria = strict_to_cls( + lesson_config["completion_criteria"], CompletionCriteriaSettings + ) + if "value" in lesson_config: + maybe_sampler = EnvironmentParameterSettings._sampler_from_config( + lesson_config["value"] + ) + if "value" not in lesson_config or maybe_sampler is None: + raise TrainerConfigError( + f"The parameter in lesson {lesson_name} does not contain a valid value." + ) + return Lesson( + completion_criteria=completion_criteria, + sampler=maybe_sampler, + name=lesson_name, + ) + @attr.s(auto_attribs=True) class EnvironmentParameterSettings: @@ -436,31 +492,8 @@ def structure(d: Mapping, t: type) -> Dict[str, "EnvironmentParameterSettings"]: # This is the curriculum case lessons: List[Lesson] = [] for lesson_dict in environment_parameter_config["curriculum"]: - # a lesson_dict contains a single lesson with the name of the lesson as key - completion_criteria = None - maybe_sampler = None - lesson_name = list(lesson_dict.keys())[0] - lesson_config = list(lesson_dict.values())[0] - if "completion_criteria" in lesson_config: - completion_criteria = strict_to_cls( - lesson_config["completion_criteria"], - CompletionCriteriaSettings, - ) - if "value" in lesson_config: - maybe_sampler = EnvironmentParameterSettings._sampler_from_config( - lesson_config["value"] - ) - if "value" not in lesson_config or maybe_sampler is None: - raise TrainerConfigError( - f"Parameter {environment_parameter} in lesson {lesson_name} does not contain a valid value." - ) - lessons.append( - Lesson( - completion_criteria=completion_criteria, - sampler=maybe_sampler, - name=lesson_name, - ) - ) + lesson = Lesson.from_dict(lesson_dict) + lessons.append(lesson) EnvironmentParameterSettings._check_lesson_chain( lessons, environment_parameter ) From e25e7f15b719f44474f6a706197ad97088228bb3 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 25 Jun 2020 16:05:08 -0700 Subject: [PATCH 11/26] Making the code nicer with cattr --- .../trainers/environment_parameter_manager.py | 18 +- ml-agents/mlagents/trainers/settings.py | 163 ++++++++---------- .../trainers/tests/test_env_param_manager.py | 38 ++-- .../mlagents/trainers/tests/test_settings.py | 10 +- 4 files changed, 105 insertions(+), 124 deletions(-) diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py index c54191fccf..85f8b3a2f9 100644 --- a/ml-agents/mlagents/trainers/environment_parameter_manager.py +++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py @@ -37,15 +37,15 @@ def __init__( def _set_sampler_seeds(self, seed): offset = 0 for settings in self._dict_settings.values(): - for lesson in settings.lessons: - if lesson.sampler.seed == -1: - lesson.sampler.seed = seed + offset + for lesson in settings.curriculum: + if lesson.value.seed == -1: + lesson.value.seed = seed + offset offset += 1 def get_minimum_reward_buffer_size(self, behavior_name: str) -> int: result = 1 for settings in self._dict_settings.values(): - for lesson in settings.lessons: + for lesson in settings.curriculum: if lesson.completion_criteria is not None: if lesson.completion_criteria.behavior == behavior_name: result = max( @@ -59,8 +59,8 @@ def get_current_samplers(self) -> Dict[str, ParameterRandomizationSettings]: lesson_num = GlobalTrainingStatus.get_parameter_state( param_name, StatusType.LESSON_NUM ) - lesson = settings.lessons[lesson_num] - samplers[param_name] = lesson.sampler + lesson = settings.curriculum[lesson_num] + samplers[param_name] = lesson.value return samplers def get_current_lesson_number(self) -> Dict[str, int]: @@ -83,10 +83,10 @@ def update_lessons( lesson_num = GlobalTrainingStatus.get_parameter_state( param_name, StatusType.LESSON_NUM ) - lesson = settings.lessons[lesson_num] + lesson = settings.curriculum[lesson_num] if ( lesson.completion_criteria is not None - and len(settings.lessons) > lesson_num + and len(settings.curriculum) > lesson_num ): behavior_to_consider = lesson.completion_criteria.behavior if behavior_to_consider in trainer_steps: @@ -102,7 +102,7 @@ def update_lessons( GlobalTrainingStatus.set_parameter_state( param_name, StatusType.LESSON_NUM, lesson_num + 1 ) - new_lesson_name = settings.lessons[lesson_num + 1].name + new_lesson_name = settings.curriculum[lesson_num + 1].name logger.info( f"Parameter '{param_name}' has changed. Now in lesson '{new_lesson_name}'" ) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 7b300327c0..2ba90268ef 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -1,6 +1,6 @@ import attr import cattr -from typing import Dict, Optional, List, Any, DefaultDict, Mapping, Tuple +from typing import Dict, Optional, List, Any, DefaultDict, Mapping, Tuple, Union from enum import Enum import collections import argparse @@ -178,12 +178,14 @@ class ParameterRandomizationType(Enum): UNIFORM: str = "uniform" GAUSSIAN: str = "gaussian" MULTIRANGEUNIFORM: str = "multirangeuniform" + CONSTANT: str = "constant" def to_settings(self) -> type: _mapping = { ParameterRandomizationType.UNIFORM: UniformSettings, ParameterRandomizationType.GAUSSIAN: GaussianSettings, ParameterRandomizationType.MULTIRANGEUNIFORM: MultiRangeUniformSettings, + ParameterRandomizationType.CONSTANT: ConstantSettings # Constant type is handled if a float is provided instead of a config } return _mapping[self] @@ -193,6 +195,33 @@ def to_settings(self) -> type: class ParameterRandomizationSettings(abc.ABC): seed: int = parser.get_default("seed") + @staticmethod + def structure( + d: Union[Mapping, float], t: type + ) -> "ParameterRandomizationSettings": + """ + Helper method to a ParameterRandomizationSettings class. Meant to be registered with + cattr.register_structure_hook() and called with cattr.structure(). This is needed to handle + the special Enum selection of ParameterRandomizationSettings classes. + """ + if isinstance(d, (float, int)): + return ConstantSettings(value=d) + if not isinstance(d, Mapping): + raise TrainerConfigError( + f"Unsupported parameter randomization configuration {d}." + ) + if "sampler_type" not in d: + raise TrainerConfigError( + "Sampler configuration does not contain sampler_type." + ) + if "sampler_parameters" not in d: + raise TrainerConfigError( + "Sampler configuration does not contain sampler_parameters." + ) + enum_key = ParameterRandomizationType(d["sampler_type"]) + t = enum_key.to_settings() + return strict_to_cls(d["sampler_parameters"], t) + @abc.abstractmethod def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: """ @@ -338,6 +367,10 @@ def need_increment( reward_buffer: List[float], smoothing: float, ) -> Tuple[bool, float]: + """ + Given measures, this method returns a boolean indicating if the lesson + needs to change now, and a float corresponding to the new smoothed value. + """ # Is the min number of episodes reached if len(reward_buffer) < increment_condition.min_lesson_length: return False, smoothing @@ -371,33 +404,27 @@ class Lesson: """ completion_criteria: Optional[CompletionCriteriaSettings] - sampler: ParameterRandomizationSettings + value: ParameterRandomizationSettings name: str @staticmethod - def from_dict(d: Mapping) -> "Lesson": - # a lesson_dict contains a single lesson with the name of the lesson as key - completion_criteria = None - maybe_sampler = None - lesson_name = list(d.keys())[0] - lesson_config = list(d.values())[0] - if "completion_criteria" in lesson_config: - completion_criteria = strict_to_cls( - lesson_config["completion_criteria"], CompletionCriteriaSettings - ) - if "value" in lesson_config: - maybe_sampler = EnvironmentParameterSettings._sampler_from_config( - lesson_config["value"] - ) - if "value" not in lesson_config or maybe_sampler is None: - raise TrainerConfigError( - f"The parameter in lesson {lesson_name} does not contain a valid value." + def structure(d: Mapping, t: type) -> "Lesson": + if "value" not in d: + key = list(d.keys())[0] + if "value" not in d[key]: + raise TrainerConfigError( + f"The lesson {key} does not have a value field" + ) + return strict_to_cls( + { + "completion_criteria": d[key].get("completion_criteria", None), + "value": d[key]["value"], + "name": key, + }, + Lesson, ) - return Lesson( - completion_criteria=completion_criteria, - sampler=maybe_sampler, - name=lesson_name, - ) + else: + return strict_to_cls(d, Lesson) @attr.s(auto_attribs=True) @@ -407,7 +434,7 @@ class EnvironmentParameterSettings: parameter. """ - lessons: List[Lesson] + curriculum: List[Lesson] @staticmethod def _check_lesson_chain(lessons, parameter_name): @@ -418,31 +445,6 @@ def _check_lesson_chain(lessons, parameter_name): f"A non-terminal lesson does not have a completion_criteria for {parameter_name}." ) - @staticmethod - def _sampler_from_config( - environment_parameter_config: Mapping - ) -> Optional[ParameterRandomizationSettings]: - """ - Returns a ParameterRandomizationSettings when the environment_parameter_config - argument corresponds to a sampler and None otherwise. - """ - if environment_parameter_config is None: - return None - if isinstance(environment_parameter_config, (float, int)): - sampler = ConstantSettings(value=float(environment_parameter_config)) - return sampler - elif "sampler_type" in environment_parameter_config: - # This is the non-constant sampler case - enum_key = ParameterRandomizationType( - environment_parameter_config["sampler_type"] - ) - t = enum_key.to_settings() - sampler = strict_to_cls( - environment_parameter_config["sampler_parameters"], t - ) - return sampler - return None - @staticmethod def structure(d: Mapping, t: type) -> Dict[str, "EnvironmentParameterSettings"]: """ @@ -456,54 +458,29 @@ def structure(d: Mapping, t: type) -> Dict[str, "EnvironmentParameterSettings"]: ) d_final: Dict[str, EnvironmentParameterSettings] = {} for environment_parameter, environment_parameter_config in d.items(): - maybe_sampler = EnvironmentParameterSettings._sampler_from_config( - environment_parameter_config - ) - if isinstance(environment_parameter_config, dict): - if ( - ( - maybe_sampler is not None - and "curriculum" in environment_parameter_config - ) - or ( - maybe_sampler is not None - and "value" in environment_parameter_config - ) - or ( - "value" in environment_parameter_config - and "curriculum" in environment_parameter_config - ) - ): - raise TrainerConfigError( - f"Parameter {environment_parameter} can either be curriculum, " - "a constant value or a sampler. Not a combination of these." - ) - if maybe_sampler is not None: + if ( + isinstance(environment_parameter_config, Mapping) + and "curriculum" in environment_parameter_config + ): + d_final[environment_parameter] = strict_to_cls( + environment_parameter_config, EnvironmentParameterSettings + ) + EnvironmentParameterSettings._check_lesson_chain( + d_final[environment_parameter].curriculum, environment_parameter + ) + else: + sampler = ParameterRandomizationSettings.structure( + environment_parameter_config, ParameterRandomizationSettings + ) d_final[environment_parameter] = EnvironmentParameterSettings( - lessons=[ + curriculum=[ Lesson( completion_criteria=None, - sampler=maybe_sampler, + value=sampler, name=environment_parameter, ) ] ) - elif "curriculum" in environment_parameter_config: - # This is the curriculum case - lessons: List[Lesson] = [] - for lesson_dict in environment_parameter_config["curriculum"]: - lesson = Lesson.from_dict(lesson_dict) - lessons.append(lesson) - EnvironmentParameterSettings._check_lesson_chain( - lessons, environment_parameter - ) - d_final[environment_parameter] = EnvironmentParameterSettings( - lessons=lessons - ) - else: - raise TrainerConfigError( - f"The parameter {environment_parameter} does not contain a valid value." - ) return d_final @@ -661,6 +638,10 @@ class RunOptions(ExportableSettings): cattr.register_structure_hook( Dict[str, EnvironmentParameterSettings], EnvironmentParameterSettings.structure ) + cattr.register_structure_hook(Lesson, Lesson.structure) + cattr.register_structure_hook( + ParameterRandomizationSettings, ParameterRandomizationSettings.structure + ) cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure) cattr.register_structure_hook( DefaultDict[str, TrainerSettings], TrainerSettings.dict_to_defaultdict diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py index b4600dca5c..0bb0e4e47a 100644 --- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py +++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py @@ -27,12 +27,12 @@ def test_sampler_conversion(): run_options = RunOptions.from_dict(yaml.safe_load(test_sampler_config_yaml)) assert run_options.environment_parameters is not None assert "param_1" in run_options.environment_parameters - lessons = run_options.environment_parameters["param_1"].lessons + lessons = run_options.environment_parameters["param_1"].curriculum assert len(lessons) == 1 assert lessons[0].completion_criteria is None - assert isinstance(lessons[0].sampler, UniformSettings) - assert lessons[0].sampler.min_value == 0.5 - assert lessons[0].sampler.max_value == 10 + assert isinstance(lessons[0].value, UniformSettings) + assert lessons[0].value.min_value == 0.5 + assert lessons[0].value.max_value == 10 test_sampler_and_constant_config_yaml = """ @@ -52,15 +52,15 @@ def test_sampler_and_constant_conversion(): ) assert "param_1" in run_options.environment_parameters assert "param_2" in run_options.environment_parameters - lessons_1 = run_options.environment_parameters["param_1"].lessons - lessons_2 = run_options.environment_parameters["param_2"].lessons + lessons_1 = run_options.environment_parameters["param_1"].curriculum + lessons_2 = run_options.environment_parameters["param_2"].curriculum # gaussian - assert isinstance(lessons_1[0].sampler, GaussianSettings) - assert lessons_1[0].sampler.mean == 4 - assert lessons_1[0].sampler.st_dev == 5 + assert isinstance(lessons_1[0].value, GaussianSettings) + assert lessons_1[0].value.mean == 4 + assert lessons_1[0].value.st_dev == 5 # constant - assert isinstance(lessons_2[0].sampler, ConstantSettings) - assert lessons_2[0].sampler.value == 20 + assert isinstance(lessons_2[0].value, ConstantSettings) + assert lessons_2[0].value.value == 20 test_curriculum_config_yaml = """ @@ -95,7 +95,7 @@ def test_sampler_and_constant_conversion(): def test_curriculum_conversion(): run_options = RunOptions.from_dict(yaml.safe_load(test_curriculum_config_yaml)) assert "param_1" in run_options.environment_parameters - lessons = run_options.environment_parameters["param_1"].lessons + lessons = run_options.environment_parameters["param_1"].curriculum assert len(lessons) == 3 # First lesson lesson = lessons[0] @@ -108,8 +108,8 @@ def test_curriculum_conversion(): assert lesson.completion_criteria.threshold == 30.0 assert lesson.completion_criteria.min_lesson_length == 100 assert lesson.completion_criteria.require_reset - assert isinstance(lesson.sampler, ConstantSettings) - assert lesson.sampler.value == 1 + assert isinstance(lesson.value, ConstantSettings) + assert lesson.value.value == 1 # Second lesson lesson = lessons[1] assert lesson.completion_criteria is not None @@ -121,14 +121,14 @@ def test_curriculum_conversion(): assert lesson.completion_criteria.threshold == 60.0 assert lesson.completion_criteria.min_lesson_length == 100 assert not lesson.completion_criteria.require_reset - assert isinstance(lesson.sampler, ConstantSettings) - assert lesson.sampler.value == 2 + assert isinstance(lesson.value, ConstantSettings) + assert lesson.value.value == 2 # Last lesson lesson = lessons[2] assert lesson.completion_criteria is None - assert isinstance(lesson.sampler, UniformSettings) - assert lesson.sampler.min_value == 1 - assert lesson.sampler.max_value == 3 + assert isinstance(lesson.value, UniformSettings) + assert lesson.value.min_value == 1 + assert lesson.value.max_value == 3 test_bad_curriculum_no_competion_criteria_config_yaml = """ diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index 6cdcd9440a..f2832e8073 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -202,16 +202,16 @@ def test_env_parameter_structure(): env_param_settings = EnvironmentParameterSettings.structure( env_params_dict, Dict[str, EnvironmentParameterSettings] ) - assert isinstance(env_param_settings["mass"].lessons[0].sampler, UniformSettings) - assert isinstance(env_param_settings["scale"].lessons[0].sampler, GaussianSettings) + assert isinstance(env_param_settings["mass"].curriculum[0].value, UniformSettings) + assert isinstance(env_param_settings["scale"].curriculum[0].value, GaussianSettings) assert isinstance( - env_param_settings["length"].lessons[0].sampler, MultiRangeUniformSettings + env_param_settings["length"].curriculum[0].value, MultiRangeUniformSettings ) assert isinstance( - env_param_settings["wall_height"].lessons[0].sampler, ConstantSettings + env_param_settings["wall_height"].curriculum[0].value, ConstantSettings ) assert isinstance( - env_param_settings["wall_height"].lessons[1].sampler, ConstantSettings + env_param_settings["wall_height"].curriculum[1].value, ConstantSettings ) # Check invalid distribution type From b0cb4e7bd33b33f6c8cf672a953461c553a25a73 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 25 Jun 2020 17:25:14 -0700 Subject: [PATCH 12/26] Added and registered an unstructure hook for PrameterRandomization --- ml-agents/mlagents/trainers/settings.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 2ba90268ef..df48c005c5 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -212,16 +212,31 @@ def structure( ) if "sampler_type" not in d: raise TrainerConfigError( - "Sampler configuration does not contain sampler_type." + f"Sampler configuration does not contain sampler_type : {d}." ) if "sampler_parameters" not in d: raise TrainerConfigError( - "Sampler configuration does not contain sampler_parameters." + f"Sampler configuration does not contain sampler_parameters : {d}." ) enum_key = ParameterRandomizationType(d["sampler_type"]) t = enum_key.to_settings() return strict_to_cls(d["sampler_parameters"], t) + @staticmethod + def unstructure(d: "ParameterRandomizationSettings") -> Mapping: + _reversed_mapping = { + UniformSettings: ParameterRandomizationType.UNIFORM, + GaussianSettings: ParameterRandomizationType.GAUSSIAN, + MultiRangeUniformSettings: ParameterRandomizationType.MULTIRANGEUNIFORM, + ConstantSettings: ParameterRandomizationType.CONSTANT, + } + sampler_type: Optional[ParameterRandomizationType] = None + for t, name in _reversed_mapping.items(): + if isinstance(d, t): + sampler_type = name + sampler_parameters = attr.asdict(d) + return {"sampler_type": sampler_type, "sampler_parameters": sampler_parameters} + @abc.abstractmethod def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: """ @@ -642,6 +657,9 @@ class RunOptions(ExportableSettings): cattr.register_structure_hook( ParameterRandomizationSettings, ParameterRandomizationSettings.structure ) + cattr.register_unstructure_hook( + ParameterRandomizationSettings, ParameterRandomizationSettings.unstructure + ) cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure) cattr.register_structure_hook( DefaultDict[str, TrainerSettings], TrainerSettings.dict_to_defaultdict From ae709655032b3bb2fde38fcf0e4592e7c5ff47f8 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 25 Jun 2020 17:41:23 -0700 Subject: [PATCH 13/26] Updating C# Walljump --- .../ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs b/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs index 6e670b298f..b251021b4c 100644 --- a/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs +++ b/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs @@ -350,9 +350,7 @@ void ConfigureAgent(int config) } else { - var min = m_ResetParams.GetWithDefault("big_wall_min_height", 8); - var max = m_ResetParams.GetWithDefault("big_wall_max_height", 8); - var height = min + Random.value * (max - min); + var height = m_ResetParams.GetWithDefault("big_wall_height", 8); localScale = new Vector3( localScale.x, height, From 6f66a7e7013f937d27f9537cfb88fec6a4bd6434 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 26 Jun 2020 10:29:04 -0700 Subject: [PATCH 14/26] Adding comments --- .../trainers/environment_parameter_manager.py | 44 +++++++++++++++++++ ml-agents/mlagents/trainers/settings.py | 12 +++++ 2 files changed, 56 insertions(+) diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py index 85f8b3a2f9..2ba312a45e 100644 --- a/ml-agents/mlagents/trainers/environment_parameter_manager.py +++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py @@ -19,6 +19,18 @@ def __init__( run_seed: int, restore: bool, ): + """ + EnvironmentParameterManager manages all the environment parameters of a training + session. It determines when parameters should change and gives access to the + current sampler of each parameter. + :param settings: A dictionary from environment parameter to + EnvironmentParameterSettings. + :param run_seed: When the seed is not provided for an environment parameter, + this seed will be used instead. + :param restore: If true, the EnvironmentParameterManager will use the + GlobalTrainingStatus to try and reload the lesson status of each environment + parameter. + """ self._dict_settings = settings for parameter_name in self._dict_settings.keys(): initial_lesson = GlobalTrainingStatus.get_parameter_state( @@ -35,6 +47,10 @@ def __init__( self._set_sampler_seeds(run_seed) def _set_sampler_seeds(self, seed): + """ + Sets the seeds for the samplers (if no seed was already present). Note that + using the provided seed. + """ offset = 0 for settings in self._dict_settings.values(): for lesson in settings.curriculum: @@ -43,6 +59,12 @@ def _set_sampler_seeds(self, seed): offset += 1 def get_minimum_reward_buffer_size(self, behavior_name: str) -> int: + """ + Calculates the minimum size of the reward buffer a behavior must use. This + method uses the 'min_lesson_length' sampler_parameter to determine this value. + :param behavior_name: The name of the behavior the minimum reward buffer + size corresponds to. + """ result = 1 for settings in self._dict_settings.values(): for lesson in settings.curriculum: @@ -54,6 +76,11 @@ def get_minimum_reward_buffer_size(self, behavior_name: str) -> int: return result def get_current_samplers(self) -> Dict[str, ParameterRandomizationSettings]: + """ + Creates a dictionary from environment parameter name to their corresponding + ParameterRandomizationSettings. If curriculum is used, the + ParameterRandomizationSettings corresponds to the sampler of the current lesson. + """ samplers: Dict[str, ParameterRandomizationSettings] = {} for param_name, settings in self._dict_settings.items(): lesson_num = GlobalTrainingStatus.get_parameter_state( @@ -64,6 +91,10 @@ def get_current_samplers(self) -> Dict[str, ParameterRandomizationSettings]: return samplers def get_current_lesson_number(self) -> Dict[str, int]: + """ + Creates a dictionary from environment parameter to the current lesson number. + If not using curriculum, this number is always 0 for that environment parameter. + """ result: Dict[str, int] = {} for parameter_name in self._dict_settings.keys(): result[parameter_name] = GlobalTrainingStatus.get_parameter_state( @@ -77,6 +108,19 @@ def update_lessons( trainer_max_steps: Dict[str, int], trainer_reward_buffer: Dict[str, List[float]], ) -> Tuple[bool, bool]: + """ + Given progress metrics, calculates if at least one environment parameter is + in a new lesson and if at least one environment parameter requires the env + to reset. + :param trainer_steps: A dictionary from behavior_name to the number of training + steps this behavior's trainer has performed. + :param trainer_max_steps: A dictionary from behavior_name to the maximum number + of training steps this behavior's trainer has performed. + :param trainer_reward_buffer: A dictionary from behavior_name to the list of + the most recent episode returns for this behavior's trainer. + :returns: A tuple of two booleans : (True if any lesson has changed, True if + environment needs to reset) + """ must_reset = False updated = False for param_name, settings in self._dict_settings.items(): diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index df48c005c5..7f49e10fb3 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -224,6 +224,10 @@ def structure( @staticmethod def unstructure(d: "ParameterRandomizationSettings") -> Mapping: + """ + Helper method to a ParameterRandomizationSettings class. Meant to be registered with + cattr.register_unstructure_hook() and called with cattr.unstructure(). + """ _reversed_mapping = { UniformSettings: ParameterRandomizationType.UNIFORM, GaussianSettings: ParameterRandomizationType.GAUSSIAN, @@ -424,6 +428,10 @@ class Lesson: @staticmethod def structure(d: Mapping, t: type) -> "Lesson": + """ + Helper method to a Lesson class. Meant to be registered with + cattr.register_structure_hook() and called with cattr.structure(). + """ if "value" not in d: key = list(d.keys())[0] if "value" not in d[key]: @@ -453,6 +461,10 @@ class EnvironmentParameterSettings: @staticmethod def _check_lesson_chain(lessons, parameter_name): + """ + Ensures that when using curriculum, all non-terminal lessons have a valid + CompletionCriteria + """ num_lessons = len(lessons) for index, lesson in enumerate(lessons): if index < num_lessons - 1 and lesson.completion_criteria is None: From 4a3c4f02f70d38218dc1dc0dc2742f238759b268 Mon Sep 17 00:00:00 2001 From: Ervin T Date: Fri, 26 Jun 2020 10:40:41 -0700 Subject: [PATCH 15/26] Add test for settings export (#4164) * Add test for settings export * Update ml-agents/mlagents/trainers/tests/test_settings.py Co-authored-by: Vincent-Pierre BERGES Co-authored-by: Vincent-Pierre BERGES --- .../mlagents/trainers/tests/test_settings.py | 118 +++++++++++++++++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index f2832e8073..01e9721ad9 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -1,7 +1,8 @@ import attr import pytest +import yaml -from typing import Dict +from typing import Dict, List, Optional from mlagents.trainers.settings import ( RunOptions, @@ -32,6 +33,32 @@ def check_if_different(testobj1: object, testobj2: object) -> None: check_if_different(val, attr.asdict(testobj2, recurse=False)[key]) +def check_dict_is_at_least( + testdict1: Dict, testdict2: Dict, exceptions: Optional[List[str]] = None +) -> None: + """ + Check if everything present in the 1st dict is the same in the second dict. + Excludes things that the second dict has but is not present in the heirarchy of the + 1st dict. Used to compare an underspecified config dict structure (e.g. as + would be provided by a user) with a complete one (e.g. as exported by RunOptions). + """ + for key, val in testdict1.items(): + if exceptions is not None and key in exceptions: + continue + assert key in testdict2 + if isinstance(val, dict): + check_dict_is_at_least(val, testdict2[key]) + elif isinstance(val, list): + assert isinstance(testdict2[key], list) + for _el0, _el1 in zip(val, testdict2[key]): + if isinstance(_el0, dict): + check_dict_is_at_least(_el0, _el1) + else: + assert val == testdict2[key] + else: # If not a dict, don't recurse into it + assert val == testdict2[key] + + def test_is_new_instance(): """ Verify that every instance of RunOptions() and its subclasses @@ -289,3 +316,92 @@ def test_env_parameter_structure(): EnvironmentParameterSettings.structure( invalid_curriculum_dict, Dict[str, EnvironmentParameterSettings] ) + + +def test_exportable_settings(): + """ + Test that structuring and unstructuring a RunOptions object results in the same + configuration representation. + """ + # Try to enable as many features as possible in this test YAML to hit all the + # edge cases. Set as much as possible as non-default values to ensure no flukes. + # TODO: Add back in environment_parameters + test_yaml = """ + behaviors: + 3DBall: + trainer_type: sac + hyperparameters: + learning_rate: 0.0004 + learning_rate_schedule: constant + batch_size: 64 + buffer_size: 200000 + buffer_init_steps: 100 + tau: 0.006 + steps_per_update: 10.0 + save_replay_buffer: true + init_entcoef: 0.5 + reward_signal_steps_per_update: 10.0 + network_settings: + normalize: false + hidden_units: 256 + num_layers: 3 + vis_encode_type: nature_cnn + memory: + memory_size: 1288 + sequence_length: 12 + reward_signals: + extrinsic: + gamma: 0.999 + strength: 1.0 + curiosity: + gamma: 0.999 + strength: 1.0 + keep_checkpoints: 5 + max_steps: 500000 + time_horizon: 1000 + summary_freq: 12000 + checkpoint_interval: 1 + threaded: true + env_settings: + env_path: test_env_path + env_args: + - test_env_args1 + - test_env_args2 + base_port: 12345 + num_envs: 8 + seed: 12345 + engine_settings: + width: 12345 + height: 12345 + quality_level: 12345 + time_scale: 12345 + target_frame_rate: 12345 + capture_frame_rate: 12345 + no_graphics: true + checkpoint_settings: + run_id: test_run_id + initialize_from: test_directory + load_model: false + resume: true + force: true + train_model: false + inference: false + debug: true + """ + loaded_yaml = yaml.safe_load(test_yaml) + run_options = RunOptions.from_dict(yaml.safe_load(test_yaml)) + dict_export = run_options.as_dict() + check_dict_is_at_least(loaded_yaml, dict_export) + + # Re-import and verify has same elements + run_options2 = RunOptions.from_dict(dict_export) + second_export = run_options2.as_dict() + + check_dict_is_at_least( + dict_export, second_export, exceptions=["environment_parameters"] + ) + # Should be able to use equality instead of back-and-forth once environment_parameters + # is working + check_dict_is_at_least( + second_export, dict_export, exceptions=["environment_parameters"] + ) From a10a4e39ec7e1bf2512bc9ada8e37f1fe3c37734 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 26 Jun 2020 10:43:38 -0700 Subject: [PATCH 16/26] Including environment parameters for the test for settings export --- .../mlagents/trainers/tests/test_settings.py | 58 ++++++++++++++++--- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index 01e9721ad9..7a62b2b1cd 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -386,22 +386,64 @@ def test_exportable_settings(): force: true train_model: false inference: false - debug: true + environment_parameters: + big_wall_height: + curriculum: + - Lesson0: + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.1 + value: + sampler_type: uniform + sampler_parameters: + min_value: 0.0 + max_value: 4.0 + - Lesson1: + completion_criteria: + measure: reward + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.2 + value: + sampler_type: gaussian + sampler_parameters: + mean: 4.0 + st_dev: 7.0 + - Lesson2: + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 20 + threshold: 0.3 + value: + sampler_type: multirangeuniform + sampler_parameters: + intervals: [[1.0, 2.0],[4.0, 5.0]] + - Lesson3: + value: 8.0 + small_wall_height: 42.0 + other_wall_height: + sampler_type: multirangeuniform + sampler_parameters: + intervals: [[1.0, 2.0],[4.0, 5.0]] """ loaded_yaml = yaml.safe_load(test_yaml) run_options = RunOptions.from_dict(yaml.safe_load(test_yaml)) dict_export = run_options.as_dict() - check_dict_is_at_least(loaded_yaml, dict_export) + check_dict_is_at_least( + loaded_yaml, dict_export, exceptions=["environment_parameters"] + ) # Re-import and verify has same elements run_options2 = RunOptions.from_dict(dict_export) second_export = run_options2.as_dict() - check_dict_is_at_least( - dict_export, second_export, exceptions=["environment_parameters"] - ) + check_dict_is_at_least(dict_export, second_export) # Should be able to use equality instead of back-and-forth once environment_parameters # is working - check_dict_is_at_least( - second_export, dict_export, exceptions=["environment_parameters"] - ) + check_dict_is_at_least(second_export, dict_export) From 699496e39dbdc0bb3279f231fa22fb02fab2f6f8 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 26 Jun 2020 11:30:14 -0700 Subject: [PATCH 17/26] First documentation update --- docs/Training-ML-Agents.md | 214 ++++++++++++++++++++----------------- docs/Using-Docker.md | 2 +- 2 files changed, 114 insertions(+), 102 deletions(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index b61fc3b24b..43bf2c6e64 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -9,13 +9,12 @@ - [Loading an Existing Model](#loading-an-existing-model) - [Training Configurations](#training-configurations) - [Behavior Configurations](#behavior-configurations) - - [Curriculum Learning](#curriculum-learning) - - [Specifying Curricula](#specifying-curricula) - - [Training with a Curriculum](#training-with-a-curriculum) - - [Environment Parameter Randomization](#environment-parameter-randomization) - - [Included Sampler Types](#included-sampler-types) - - [Defining a New Sampler Type](#defining-a-new-sampler-type) - - [Training with Environment Parameter Randomization](#training-with-environment-parameter-randomization) + - [Environment Parameters](#environment-parameters) + - [Environment Parameter Randomization](#environment-parameter-randomization) + - [Supported Sampler Types](#supported-sampler-types) + - [Training with Environment Parameter Randomization](#training-with-environment-parameter-randomization) + - [Curriculum Learning](#curriculum-learning) + - [Training with a Curriculum](#training-with-a-curriculum) - [Training Using Concurrent Unity Instances](#training-using-concurrent-unity-instances) For a broad overview of reinforcement learning, imitation learning and all the @@ -137,8 +136,8 @@ More specifically, this section offers a detailed guide on the command-line flags for `mlagents-learn` that control the training configurations: - ``: defines the training hyperparameters for each - Behavior in the scene, and the set-ups for Curriculum Learning and - Environment Parameter Randomization + Behavior in the scene, and the set-ups for the environment parameters + (Curriculum Learning and Environment Parameter Randomization) - `--num-envs`: number of concurrent Unity instances to use during training Reminder that a detailed description of all command-line options can be found by @@ -179,7 +178,8 @@ use during training, and the answers to the above questions will dictate its con The rest of this guide breaks down the different sub-sections of the trainer config file and explains the possible settings for each. -**NOTE:** The configuration file format has been changed from 0.17.0 and onwards. To convert +**NOTE:** The configuration file format has been changed between 0.17.0 and onwards and +between 0.18.0 and onwards. To convert an old set of configuration files (trainer config, curriculum, and sampler files) to the new format, a script has been provided. Run `python -m mlagents.trainers.upgrade_config -h` in your console to see the script's usage. @@ -194,7 +194,7 @@ below is a sample file that includes all the possible settings if we're using a PPO trainer with all the possible training functionalities enabled (memory, behavioral cloning, curiosity, GAIL and self-play). You will notice that curriculum and environment parameter randomization settings are not part of the `behaviors` -configuration, but their settings live in different sections that we'll cover subsequently. +configuration, but in their own section called `environment_parameters`. ```yaml behaviors: @@ -337,11 +337,13 @@ each of these parameters mean and provide guidelines on how to set them. See description of all the configurations listed above, along with their defaults. Unless otherwise specified, omitting a configuration will revert it to its default. -### Curriculum Learning -To enable curriculum learning, you need to add a `curriculum ` sub-section to the trainer -configuration YAML file. Within this sub-section, add an entry for each behavior that defines -the curriculum for thatbehavior. Here is one example: +### Environment Parameters + +In order to control the `EnvironmentParameters` in the Unity simulation during training, +you need to add a section called `environment_parameters`. For example you can set the +value of an `EnvironmentParameter` called `my_environment_parameter` to `3.0` with +the following code : ```yml behaviors: @@ -349,93 +351,30 @@ behaviors: # < Same as above > # Add this section -curriculum: - BehaviorY: - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - wall_height: [1.5, 2.0, 2.5, 4.0] -``` - -Each group of Agents under the same `Behavior Name` in an environment can have a -corresponding curriculum. These curricula are held in what we call a -"metacurriculum". A metacurriculum allows different groups of Agents to follow -different curricula within the same environment. - -#### Specifying Curricula - -In order to define the curricula, the first step is to decide which parameters -of the environment will vary. In the case of the Wall Jump environment, the -height of the wall is what varies. Rather than adjusting it by hand, we will -create a configuration which describes the structure of the curricula. Within it, we -can specify which points in the training process our wall height will change, -either based on the percentage of training steps which have taken place, or what -the average reward the agent has received in the recent past is. Below is an -example config for the curricula for the Wall Jump environment. - -```yaml -behaviors: - BigWallJump: - # < Trainer parameters for BigWallJump > - SmallWallJump: - # < Trainer parameters for SmallWallJump > - -curriculum: - BigWallJump: - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - big_wall_min_height: [0.0, 4.0, 6.0, 8.0] - big_wall_max_height: [4.0, 7.0, 8.0, 8.0] - SmallWallJump: - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - small_wall_height: [1.5, 2.0, 2.5, 4.0] +environment_parameters: + my_environment_parameter: 3.0 ``` -The curriculum for each Behavior has the following parameters: - -| **Setting** | **Description** | -| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `measure` | What to measure learning progress, and advancement in lessons by.

`reward` uses a measure received reward, while `progress` uses the ratio of steps/max_steps. | -| `thresholds` | Points in value of `measure` where lesson should be increased. | -| `min_lesson_length` | The minimum number of episodes that should be completed before the lesson can change. If `measure` is set to `reward`, the average cumulative reward of the last `min_lesson_length` episodes will be used to determine if the lesson should change. Must be nonnegative.

**Important**: the average reward that is compared to the thresholds is different than the mean reward that is logged to the console. For example, if `min_lesson_length` is `100`, the lesson will increment after the average cumulative reward of the last `100` episodes exceeds the current threshold. The mean reward logged to the console is dictated by the `summary_freq` parameter defined above. | -| `signal_smoothing` | Whether to weight the current progress measure by previous values. | -| `parameters` | Corresponds to environment parameters to control. Length of each array should be one greater than number of thresholds. | - -#### Training with a Curriculum - -Once we have specified our metacurriculum and curricula, we can launch -`mlagents-learn` to point to the config file containing -our curricula and PPO will train using Curriculum Learning. For example, to -train agents in the Wall Jump environment with curriculum learning, we can run: +Inside the Unity simulation, you can access your Environment Parameters by doing : -```sh -mlagents-learn config/ppo/WallJump_curriculum.yaml --run-id=wall-jump-curriculum +```csharp +Academy.Instance.EnvironmentParameters.GetWithDefault("my_environment_parameter", 0.0f); ``` -We can then keep track of the current lessons and progresses via TensorBoard. If you've terminated -the run, you can resume it using `--resume` and lesson progress will start off where it -ended. +#### Environment Parameter Randomization -### Environment Parameter Randomization +To enable environment parameter randomization, you need to edit the `environment_parameters` +section of your training configuration yaml file. Instead of providing a single float value +for your environment parameter, you can specify a sampler instead. Here is an example with +three environment parameters called `mass`, `length` and `scale`: -To enable parameter randomization, you need to add a `parameter-randomization` sub-section -to your trainer config YAML file. Here is one example: - -```yaml +```yml behaviors: - # < Same as above> - -parameter_randomization: + BehaviorY: + # < Same as above > +# Add this section +environment_parameters: mass: sampler_type: uniform sampler_parameters: @@ -454,16 +393,13 @@ parameter_randomization: st_dev: .3 ``` -Note that `mass`, `length` and `scale` are the names of the environment -parameters that will be sampled. These are used as keys by the `EnvironmentParameter` -class to sample new parameters via the function `GetWithDefault`. | **Setting** | **Description** | | :--------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sampler_type` | A string identifier for the type of sampler to use for this `Environment Parameter`. | | `sampler_parameters` | The parameters for a given `sampler_type`. Samplers of different types can have different `sampler_parameters` | -#### Supported Sampler Types +##### Supported Sampler Types Below is a list of the `sampler_type` values supported by the toolkit. @@ -487,12 +423,11 @@ Below is a list of the `sampler_type` values supported by the toolkit. The implementation of the samplers can be found in the [Samplers.cs file](../com.unity.ml-agents/Runtime/Sampler.cs). -#### Training with Environment Parameter Randomization +##### Training with Environment Parameter Randomization After the sampler configuration is defined, we proceed by launching `mlagents-learn` -and specify trainer configuration with `parameter-randomization` defined. For example, -if we wanted to train the 3D ball agent with parameter randomization using -`Environment Parameters` with sampling setup, we would run +and specify trainer configuration with parameter randomization enabled. For example, +if we wanted to train the 3D ball agent with parameter randomization, we would run ```sh mlagents-learn config/ppo/3DBall_randomize.yaml --run-id=3D-Ball-randomize @@ -500,6 +435,83 @@ mlagents-learn config/ppo/3DBall_randomize.yaml --run-id=3D-Ball-randomize We can observe progress and metrics via Tensorboard. +#### Curriculum + +To enable curriculum learning, you need to add a `curriculum` sub-section to your environment +parameter. Here is one example with the environment parameter `my_environment_parameter` : + +```yml +behaviors: + BehaviorY: + # < Same as above > + +# Add this section +environment_parameters: + my_environment_parameter: + curriculum: + - MyFirstLesson: # The '-' is important as this is a list + completion_criteria: + measure: progress + behavior: my_behavior + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.2 + value: 0.0 + - MySecondLesson: + completion_criteria: + measure: progress + behavior: my_behavior + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.6 + require_reset: true + value: + sampler_type: uniform + sampler_parameters: + min_value: 4.0 + max_value: 7.0 + - MyLastLesson: + value: 8.0 +``` + +Note that this curriculum __only__ applies to `my_environment_parameter`. The `curriculum` section +contains a list of `Lessons`. In the example, the lessons are named `MyFirstLesson`, `MySecondLesson` +and `MyLastLesson`. +Each `Lesson` has two types of fields : + + - `completion_criteria` which determines what needs to happen in the simulation before the lesson + can be considered complete. When that condition is met, the curriculum moves on to the next + `Lesson`. Note that you do not need to specify a `completion_criteria` for the last `Lesson` + - `value` which is the value the environment parameter will take during the lesson. Note that this + can be a float or a sampler. + + There are the different settings of the `completion_criteria` : + + +| **Setting** | **Description** | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `measure` | What to measure learning progress, and advancement in lessons by.

`reward` uses a measure received reward, while `progress` uses the ratio of steps/max_steps. | +| `behavior` | Specifies which behavior is being tracked. There can be multiple behaviors with different names, each at different points of training. This setting allows the curriculum to track only one of them. | +| `threshold` | Determines at what point in value of `measure` the lesson should be increased. | +| `min_lesson_length` | The minimum number of episodes that should be completed before the lesson can change. If `measure` is set to `reward`, the average cumulative reward of the last `min_lesson_length` episodes will be used to determine if the lesson should change. Must be nonnegative.

**Important**: the average reward that is compared to the thresholds is different than the mean reward that is logged to the console. For example, if `min_lesson_length` is `100`, the lesson will increment after the average cumulative reward of the last `100` episodes exceeds the current threshold. The mean reward logged to the console is dictated by the `summary_freq` parameter defined above. | +| `signal_smoothing` | Whether to weight the current progress measure by previous values. | +| `require_reset` | Whether changing lesson requires the environment to reset (default: false) | +##### Training with a Curriculum + +Once we have specified our metacurriculum and curricula, we can launch +`mlagents-learn` to point to the config file containing +our curricula and PPO will train using Curriculum Learning. For example, to +train agents in the Wall Jump environment with curriculum learning, we can run: + +```sh +mlagents-learn config/ppo/WallJump_curriculum.yaml --run-id=wall-jump-curriculum +``` + +We can then keep track of the current lessons and progresses via TensorBoard. If you've terminated +the run, you can resume it using `--resume` and lesson progress will start off where it +ended. + + ### Training Using Concurrent Unity Instances In order to run concurrent Unity instances during training, set the number of diff --git a/docs/Using-Docker.md b/docs/Using-Docker.md index f7ea5b7a7c..f2922964f8 100644 --- a/docs/Using-Docker.md +++ b/docs/Using-Docker.md @@ -36,7 +36,7 @@ agents using camera-based visual observations might be slower. - Since Docker runs a container in an environment that is isolated from the host machine, a mounted directory in your host machine is used to share data, e.g. - the trainer configuration file, Unity executable, curriculum files and + the trainer configuration file, Unity executable and TensorFlow graph. For convenience, we created an empty `unity-volume` directory at the root of the repository for this purpose, but feel free to use any other directory. The remainder of this guide assumes that the From 3755bc556a0dbc1f11275c38069f0ef4b8bd79be Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 26 Jun 2020 11:32:25 -0700 Subject: [PATCH 18/26] Fixing a link --- docs/Training-ML-Agents.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 43bf2c6e64..02447f9bb7 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -13,7 +13,7 @@ - [Environment Parameter Randomization](#environment-parameter-randomization) - [Supported Sampler Types](#supported-sampler-types) - [Training with Environment Parameter Randomization](#training-with-environment-parameter-randomization) - - [Curriculum Learning](#curriculum-learning) + - [Curriculum Learning](#curriculum) - [Training with a Curriculum](#training-with-a-curriculum) - [Training Using Concurrent Unity Instances](#training-using-concurrent-unity-instances) From 84786cecd8a06324c33afaa2aef95d5cd9a5a34b Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 26 Jun 2020 11:42:07 -0700 Subject: [PATCH 19/26] Updating changelog and migrating --- com.unity.ml-agents/CHANGELOG.md | 4 ++++ docs/Migrating.md | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index fa7a04096c..8c1385c514 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -13,6 +13,10 @@ and this project adheres to #### ml-agents / ml-agents-envs / gym-unity (Python) - The Parameter Randomization feature has been refactored to enable sampling of new parameters per episode to improve robustness. The `resampling-interval` parameter has been removed and the config structure updated. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md). (#4065) +- The Parameter Randomization feature has been merged with the Curriculum feature. It is now possible to specify a sampler +in the lesson of a Curriculum. Curriculum has been refactored and is now specified at the level of the parameter, not the +behavior. More information +[here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md).(#4160) ### Minor Changes #### com.unity.ml-agents (C#) diff --git a/docs/Migrating.md b/docs/Migrating.md index f27baef5f8..5b2b2d7aed 100644 --- a/docs/Migrating.md +++ b/docs/Migrating.md @@ -14,7 +14,21 @@ double-check that the versions are in the same. The versions can be found in # Migrating -## Migrating from Release 1 to latest +## Migrating from Release 3 to latest + +### Important changes +- The Parameter Randomization feature has been merged with the Curriculum feature. It is now possible to specify a sampler +in the lesson of a Curriculum. Curriculum has been refactored and is now specified at the level of the parameter, not the +behavior. More information +[here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md).(#4160) + +### Steps to Migrate +- The configuration format for curriculum and parameter randomization has changed. To upgrade your configuration files, +an upgrade script has been provided. Run `python -m mlagents.trainers.upgrade_config -h` to see the script usage. Note that you will have had to upgrade to/install the current version of ML-Agents before running the script. For manual update : + - If your config file used a `parameter_randomization` section, rename that section to `environment_parameters` + - If your config file used a `curriculum` section, you will need to rewrite your curriculum with this [format](Training-ML-Agents.md#curriculum). + +## Migrating from Release 1 to Release 3 ### Important changes - Training artifacts (trained models, summaries) are now found under `results/` From 0976352a188ff338f82941cb425e0e8624a1ba8c Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Mon, 29 Jun 2020 11:34:01 -0700 Subject: [PATCH 20/26] adding some more tests for the conversion script --- .../trainers/tests/test_config_conversion.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/tests/test_config_conversion.py b/ml-agents/mlagents/trainers/tests/test_config_conversion.py index 721fb33361..644dbdced3 100644 --- a/ml-agents/mlagents/trainers/tests/test_config_conversion.py +++ b/ml-agents/mlagents/trainers/tests/test_config_conversion.py @@ -128,7 +128,7 @@ BigWallJump: measure: progress thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 + min_lesson_length: 200 signal_smoothing: true parameters: big_wall_min_height: [0.0, 4.0, 6.0, 8.0] @@ -192,7 +192,24 @@ def test_convert(): config = convert(old_behaviors, old_curriculum, old_sampler) assert BRAIN_NAME in config["behaviors"] assert "big_wall_min_height" in config["environment_parameters"] + + curriculum = config["environment_parameters"]["big_wall_min_height"]["curriculum"] + assert len(curriculum) == 4 + for i, expected_value in enumerate([0.0, 4.0, 6.0, 8.0]): + assert curriculum[i][f"Lesson{i}"]["value"] == expected_value + for i, threshold in enumerate([0.1, 0.3, 0.5]): + criteria = curriculum[i][f"Lesson{i}"]["completion_criteria"] + assert criteria["threshold"] == threshold + assert criteria["behavior"] == "BigWallJump" + assert criteria["signal_smoothing"] + assert criteria["min_lesson_length"] == 200 + assert criteria["measure"] == "progress" + assert "gravity" in config["environment_parameters"] + gravity = config["environment_parameters"]["gravity"] + assert gravity["sampler_type"] == "uniform" + assert gravity["sampler_parameters"]["min_value"] == 7 + assert gravity["sampler_parameters"]["max_value"] == 12 def test_remove_nones(): From 09851eea75ad637b655146a1e02ad9736d1fdafb Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Mon, 29 Jun 2020 13:49:02 -0700 Subject: [PATCH 21/26] fixing bugs and using samplers in the walljump curriculum --- config/ppo/WallJump_curriculum.yaml | 48 +++++++++---------------- ml-agents/mlagents/trainers/settings.py | 4 +-- 2 files changed, 18 insertions(+), 34 deletions(-) diff --git a/config/ppo/WallJump_curriculum.yaml b/config/ppo/WallJump_curriculum.yaml index db0b2bdc35..50c7637386 100644 --- a/config/ppo/WallJump_curriculum.yaml +++ b/config/ppo/WallJump_curriculum.yaml @@ -50,7 +50,7 @@ behaviors: summary_freq: 20000 threaded: true environment_parameters: - big_wall_min_height: + big_wall_height: curriculum: - Lesson0: completion_criteria: @@ -59,7 +59,11 @@ environment_parameters: signal_smoothing: true min_lesson_length: 100 threshold: 0.1 - value: 0.0 + value: + sampler_type: uniform + sampler_parameters: + min_value: 0.0 + max_value: 4.0 - Lesson1: completion_criteria: measure: progress @@ -67,7 +71,11 @@ environment_parameters: signal_smoothing: true min_lesson_length: 100 threshold: 0.3 - value: 4.0 + value: + sampler_type: uniform + sampler_parameters: + min_value: 4.0 + max_value: 7.0 - Lesson2: completion_criteria: measure: progress @@ -75,35 +83,11 @@ environment_parameters: signal_smoothing: true min_lesson_length: 100 threshold: 0.5 - value: 6.0 - - Lesson3: - value: 8.0 - big_wall_max_height: - curriculum: - - Lesson0: - completion_criteria: - measure: progress - behavior: BigWallJump - signal_smoothing: true - min_lesson_length: 100 - threshold: 0.1 - value: 4.0 - - Lesson1: - completion_criteria: - measure: progress - behavior: BigWallJump - signal_smoothing: true - min_lesson_length: 100 - threshold: 0.3 - value: 7.0 - - Lesson2: - completion_criteria: - measure: progress - behavior: BigWallJump - signal_smoothing: true - min_lesson_length: 100 - threshold: 0.5 - value: 8.0 + value: + sampler_type: uniform + sampler_parameters: + min_value: 6.0 + max_value: 8.0 - Lesson3: value: 8.0 small_wall_height: diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 7f49e10fb3..afe7bd1ef2 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -234,10 +234,10 @@ def unstructure(d: "ParameterRandomizationSettings") -> Mapping: MultiRangeUniformSettings: ParameterRandomizationType.MULTIRANGEUNIFORM, ConstantSettings: ParameterRandomizationType.CONSTANT, } - sampler_type: Optional[ParameterRandomizationType] = None + sampler_type: Optional[str] = None for t, name in _reversed_mapping.items(): if isinstance(d, t): - sampler_type = name + sampler_type = name.value sampler_parameters = attr.asdict(d) return {"sampler_type": sampler_type, "sampler_parameters": sampler_parameters} From a5f9edca7ccc64ed0866a722278a4bcd791fa8b6 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Mon, 29 Jun 2020 16:14:35 -0700 Subject: [PATCH 22/26] Changing the format of the curriculum file as per discussion --- config/ppo/WallJump_curriculum.yaml | 16 +++---- docs/Training-ML-Agents.md | 10 +++-- ml-agents/mlagents/trainers/settings.py | 27 +----------- .../trainers/tests/test_env_param_manager.py | 18 ++++---- .../mlagents/trainers/tests/test_settings.py | 44 +++++++++---------- 5 files changed, 46 insertions(+), 69 deletions(-) diff --git a/config/ppo/WallJump_curriculum.yaml b/config/ppo/WallJump_curriculum.yaml index 50c7637386..9ccc2d33b6 100644 --- a/config/ppo/WallJump_curriculum.yaml +++ b/config/ppo/WallJump_curriculum.yaml @@ -52,7 +52,7 @@ behaviors: environment_parameters: big_wall_height: curriculum: - - Lesson0: + - name: Lesson0 # The '-' is important as this is a list completion_criteria: measure: progress behavior: BigWallJump @@ -64,7 +64,7 @@ environment_parameters: sampler_parameters: min_value: 0.0 max_value: 4.0 - - Lesson1: + - name: Lesson1 # This is the start of the second lesson completion_criteria: measure: progress behavior: BigWallJump @@ -76,7 +76,7 @@ environment_parameters: sampler_parameters: min_value: 4.0 max_value: 7.0 - - Lesson2: + - name: Lesson2 completion_criteria: measure: progress behavior: BigWallJump @@ -88,11 +88,11 @@ environment_parameters: sampler_parameters: min_value: 6.0 max_value: 8.0 - - Lesson3: + - name: Lesson3 value: 8.0 small_wall_height: curriculum: - - Lesson0: + - name: Lesson0 completion_criteria: measure: progress behavior: SmallWallJump @@ -100,7 +100,7 @@ environment_parameters: min_lesson_length: 100 threshold: 0.1 value: 1.5 - - Lesson1: + - name: Lesson1 completion_criteria: measure: progress behavior: SmallWallJump @@ -108,7 +108,7 @@ environment_parameters: min_lesson_length: 100 threshold: 0.3 value: 2.0 - - Lesson2: + - name: Lesson2 completion_criteria: measure: progress behavior: SmallWallJump @@ -116,5 +116,5 @@ environment_parameters: min_lesson_length: 100 threshold: 0.5 value: 2.5 - - Lesson3: + - name: Lesson3 value: 4.0 diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 02447f9bb7..7f44ae00de 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -449,7 +449,7 @@ behaviors: environment_parameters: my_environment_parameter: curriculum: - - MyFirstLesson: # The '-' is important as this is a list + - name: MyFirstLesson # The '-' is important as this is a list completion_criteria: measure: progress behavior: my_behavior @@ -457,7 +457,7 @@ environment_parameters: min_lesson_length: 100 threshold: 0.2 value: 0.0 - - MySecondLesson: + - name: MySecondLesson # This is the start of the second lesson completion_criteria: measure: progress behavior: my_behavior @@ -470,15 +470,17 @@ environment_parameters: sampler_parameters: min_value: 4.0 max_value: 7.0 - - MyLastLesson: + - name: MyLastLesson value: 8.0 ``` Note that this curriculum __only__ applies to `my_environment_parameter`. The `curriculum` section contains a list of `Lessons`. In the example, the lessons are named `MyFirstLesson`, `MySecondLesson` and `MyLastLesson`. -Each `Lesson` has two types of fields : +Each `Lesson` has 3 fields : + - `name` which is a user defined name for the lesson (The name of the lesson will be displayed in + the console when the lesson changes) - `completion_criteria` which determines what needs to happen in the simulation before the lesson can be considered complete. When that condition is met, the curriculum moves on to the next `Lesson`. Note that you do not need to specify a `completion_criteria` for the last `Lesson` diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index afe7bd1ef2..24dc48a86b 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -422,32 +422,9 @@ class Lesson: the last lesson in the curriculum. """ - completion_criteria: Optional[CompletionCriteriaSettings] value: ParameterRandomizationSettings name: str - - @staticmethod - def structure(d: Mapping, t: type) -> "Lesson": - """ - Helper method to a Lesson class. Meant to be registered with - cattr.register_structure_hook() and called with cattr.structure(). - """ - if "value" not in d: - key = list(d.keys())[0] - if "value" not in d[key]: - raise TrainerConfigError( - f"The lesson {key} does not have a value field" - ) - return strict_to_cls( - { - "completion_criteria": d[key].get("completion_criteria", None), - "value": d[key]["value"], - "name": key, - }, - Lesson, - ) - else: - return strict_to_cls(d, Lesson) + completion_criteria: Optional[CompletionCriteriaSettings] = attr.ib(default=None) @attr.s(auto_attribs=True) @@ -665,7 +642,7 @@ class RunOptions(ExportableSettings): cattr.register_structure_hook( Dict[str, EnvironmentParameterSettings], EnvironmentParameterSettings.structure ) - cattr.register_structure_hook(Lesson, Lesson.structure) + cattr.register_structure_hook(Lesson, strict_to_cls) cattr.register_structure_hook( ParameterRandomizationSettings, ParameterRandomizationSettings.structure ) diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py index 0bb0e4e47a..b8fb92e15e 100644 --- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py +++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py @@ -67,7 +67,7 @@ def test_sampler_and_constant_conversion(): environment_parameters: param_1: curriculum: - - FirstLesson: + - name: Lesson1 completion_criteria: measure: reward behavior: fake_behavior @@ -75,7 +75,7 @@ def test_sampler_and_constant_conversion(): min_lesson_length: 100 require_reset: true value: 1 - - SecondLesson: + - name: Lesson2 completion_criteria: measure: reward behavior: fake_behavior @@ -83,7 +83,7 @@ def test_sampler_and_constant_conversion(): min_lesson_length: 100 require_reset: false value: 2 - - LastLesson: + - name: Lesson3 value: sampler_type: uniform sampler_parameters: @@ -135,7 +135,7 @@ def test_curriculum_conversion(): environment_parameters: param_1: curriculum: - - FirstLesson: + - name: Lesson1 completion_criteria: measure: reward behavior: fake_behavior @@ -143,9 +143,9 @@ def test_curriculum_conversion(): min_lesson_length: 100 require_reset: true value: 1 - - SecondLesson: + - name: Lesson2 value: 2 - - LastLesson: + - name: Lesson3 value: sampler_type: uniform sampler_parameters: @@ -165,7 +165,7 @@ def test_curriculum_raises_no_completion_criteria_conversion(): environment_parameters: param_1: curriculum: - - FirstLesson: + - name: Lesson1 completion_criteria: measure: reward behavior: fake_behavior @@ -173,7 +173,7 @@ def test_curriculum_raises_no_completion_criteria_conversion(): min_lesson_length: 100 require_reset: true value: 1 - - SecondLesson: + - name: Lesson2 completion_criteria: measure: progress behavior: fake_behavior @@ -181,7 +181,7 @@ def test_curriculum_raises_no_completion_criteria_conversion(): min_lesson_length: 100 require_reset: false value: 2 - - LastLesson: + - name: Lesson3 value: sampler_type: uniform sampler_parameters: diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index 7a62b2b1cd..9263f3efd1 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -213,16 +213,15 @@ def test_env_parameter_structure(): "wall_height": { "curriculum": [ { - "Lesson1": { - "completion_criteria": { - "measure": "reward", - "behavior": "fake_behavior", - "threshold": 10, - }, - "value": 1, - } + "name": "Lesson1", + "completion_criteria": { + "measure": "reward", + "behavior": "fake_behavior", + "threshold": 10, + }, + "value": 1, }, - {"Lesson2": {"value": 4}}, + {"value": 4, "name": "Lesson2"}, ] }, } @@ -299,16 +298,15 @@ def test_env_parameter_structure(): "wall_height": { "curriculum": [ { - "Lesson1": { - "completion_criteria": { - "measure": "progress", - "behavior": "fake_behavior", - "threshold": 10, - }, # > 1 is too large - "value": 1, - }, - "Lesson2": {"value": 4}, - } + "name": "Lesson1", + "completion_criteria": { + "measure": "progress", + "behavior": "fake_behavior", + "threshold": 10, + }, # > 1 is too large + "value": 1, + }, + {"value": 4, "name": "Lesson2"}, ] } } @@ -389,7 +387,7 @@ def test_exportable_settings(): environment_parameters: big_wall_height: curriculum: - - Lesson0: + - name: Lesson0 completion_criteria: measure: progress behavior: BigWallJump @@ -401,7 +399,7 @@ def test_exportable_settings(): sampler_parameters: min_value: 0.0 max_value: 4.0 - - Lesson1: + - name: Lesson1 completion_criteria: measure: reward behavior: BigWallJump @@ -413,7 +411,7 @@ def test_exportable_settings(): sampler_parameters: mean: 4.0 st_dev: 7.0 - - Lesson2: + - name: Lesson2 completion_criteria: measure: progress behavior: BigWallJump @@ -424,7 +422,7 @@ def test_exportable_settings(): sampler_type: multirangeuniform sampler_parameters: intervals: [[1.0, 2.0],[4.0, 5.0]] - - Lesson3: + - name: Lesson3 value: 8.0 small_wall_height: 42.0 other_wall_height: From a6429c2ea97ec04c2b4f0a4587e9270b49b66a26 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Mon, 29 Jun 2020 17:33:27 -0700 Subject: [PATCH 23/26] Addressing comments --- docs/Training-ML-Agents.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 7f44ae00de..e7fc6c1df3 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -178,7 +178,7 @@ use during training, and the answers to the above questions will dictate its con The rest of this guide breaks down the different sub-sections of the trainer config file and explains the possible settings for each. -**NOTE:** The configuration file format has been changed between 0.17.0 and onwards and +**NOTE:** The configuration file format has been changed between 0.17.0 and 0.18.0 and between 0.18.0 and onwards. To convert an old set of configuration files (trainer config, curriculum, and sampler files) to the new format, a script has been provided. Run `python -m mlagents.trainers.upgrade_config -h` in your From cf76973baf1d0c14aa3c60f553572a450baaea10 Mon Sep 17 00:00:00 2001 From: Vincent-Pierre BERGES Date: Mon, 29 Jun 2020 17:33:59 -0700 Subject: [PATCH 24/26] Update ml-agents/mlagents/trainers/settings.py Co-authored-by: Ervin T. --- ml-agents/mlagents/trainers/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 24dc48a86b..f58d134ab4 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -376,7 +376,7 @@ def _check_threshold_value(self, attribute, value): ) if self.threshold < 0.0: raise TrainerConfigError( - "Threshold for next lesson cannot be greater negative when the measure is progress." + "Threshold for next lesson cannot be negative when the measure is progress." ) @staticmethod From d89619b0fc76dad5c97ed74cd6da91b55c2e23ec Mon Sep 17 00:00:00 2001 From: Vincent-Pierre BERGES Date: Tue, 7 Jul 2020 10:38:29 -0700 Subject: [PATCH 25/26] Update docs/Migrating.md Co-authored-by: Chris Elion --- docs/Migrating.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Migrating.md b/docs/Migrating.md index 5b2b2d7aed..593d29f5a9 100644 --- a/docs/Migrating.md +++ b/docs/Migrating.md @@ -24,7 +24,7 @@ behavior. More information ### Steps to Migrate - The configuration format for curriculum and parameter randomization has changed. To upgrade your configuration files, -an upgrade script has been provided. Run `python -m mlagents.trainers.upgrade_config -h` to see the script usage. Note that you will have had to upgrade to/install the current version of ML-Agents before running the script. For manual update : +an upgrade script has been provided. Run `python -m mlagents.trainers.upgrade_config -h` to see the script usage. Note that you will have had to upgrade to/install the current version of ML-Agents before running the script. To update manually: - If your config file used a `parameter_randomization` section, rename that section to `environment_parameters` - If your config file used a `curriculum` section, you will need to rewrite your curriculum with this [format](Training-ML-Agents.md#curriculum). From 5a61d30c16cd51f6079ceaf6c87ad450285dd50a Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 12:06:23 -0700 Subject: [PATCH 26/26] addressing comments --- .../trainers/environment_parameter_manager.py | 14 +++--- ml-agents/mlagents/trainers/learn.py | 21 ++------ ml-agents/mlagents/trainers/settings.py | 21 +++----- .../mlagents/trainers/tests/test_learn.py | 33 ++++++++----- .../mlagents/trainers/tests/test_simple_rl.py | 3 ++ .../trainers/tests/test_trainer_controller.py | 3 +- .../trainers/tests/test_trainer_util.py | 3 ++ .../mlagents/trainers/trainer_controller.py | 48 ++++++++----------- ml-agents/mlagents/trainers/trainer_util.py | 14 +++--- 9 files changed, 74 insertions(+), 86 deletions(-) diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py index 2ba312a45e..232dd0fb83 100644 --- a/ml-agents/mlagents/trainers/environment_parameter_manager.py +++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py @@ -1,7 +1,6 @@ -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Optional from mlagents.trainers.settings import ( EnvironmentParameterSettings, - CompletionCriteriaSettings, ParameterRandomizationSettings, ) from collections import defaultdict @@ -15,9 +14,9 @@ class EnvironmentParameterManager: def __init__( self, - settings: Dict[str, EnvironmentParameterSettings], - run_seed: int, - restore: bool, + settings: Optional[Dict[str, EnvironmentParameterSettings]] = None, + run_seed: int = -1, + restore: bool = False, ): """ EnvironmentParameterManager manages all the environment parameters of a training @@ -31,6 +30,8 @@ def __init__( GlobalTrainingStatus to try and reload the lesson status of each environment parameter. """ + if settings is None: + settings = {} self._dict_settings = settings for parameter_name in self._dict_settings.keys(): initial_lesson = GlobalTrainingStatus.get_parameter_state( @@ -134,8 +135,7 @@ def update_lessons( ): behavior_to_consider = lesson.completion_criteria.behavior if behavior_to_consider in trainer_steps: - must_increment, new_smoothing = CompletionCriteriaSettings.need_increment( - lesson.completion_criteria, + must_increment, new_smoothing = lesson.completion_criteria.need_increment( float(trainer_steps[behavior_to_consider]) / float(trainer_max_steps[behavior_to_consider]), trainer_reward_buffer[behavior_to_consider], diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 2da04d1e9d..55525468cf 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -5,7 +5,7 @@ import numpy as np import json -from typing import Callable, Optional, List, Dict +from typing import Callable, Optional, List import mlagents.trainers import mlagents_envs @@ -22,7 +22,7 @@ ) from mlagents.trainers.cli_utils import parser from mlagents_envs.environment import UnityEnvironment -from mlagents.trainers.settings import RunOptions, EnvironmentParameterSettings +from mlagents.trainers.settings import RunOptions from mlagents.trainers.training_status import GlobalTrainingStatus from mlagents_envs.base_env import BaseEnv @@ -129,7 +129,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) - maybe_parameter_manager = try_create_param_manager( + env_parameter_manager = EnvironmentParameterManager( options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) @@ -139,8 +139,8 @@ def run_training(run_seed: int, options: RunOptions) -> None: not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, + env_parameter_manager, maybe_init_path, - maybe_parameter_manager, False, ) # Create controller and begin training. @@ -148,7 +148,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: trainer_factory, write_path, checkpoint_settings.run_id, - maybe_parameter_manager, + env_parameter_manager, not checkpoint_settings.inference, run_seed, ) @@ -192,17 +192,6 @@ def write_timing_tree(output_dir: str) -> None: ) -def try_create_param_manager( - config: Optional[Dict[str, EnvironmentParameterSettings]], - run_seed: int, - restore: bool = False, -) -> Optional[EnvironmentParameterManager]: - if config is None: - return None - else: - return EnvironmentParameterManager(config, run_seed, restore) - - def create_environment_factory( env_path: Optional[str], no_graphics: bool, diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 60bb52d572..55ee9b0b63 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -379,36 +379,29 @@ def _check_threshold_value(self, attribute, value): "Threshold for next lesson cannot be negative when the measure is progress." ) - @staticmethod def need_increment( - increment_condition: "CompletionCriteriaSettings", - progress: float, - reward_buffer: List[float], - smoothing: float, + self, progress: float, reward_buffer: List[float], smoothing: float ) -> Tuple[bool, float]: """ Given measures, this method returns a boolean indicating if the lesson needs to change now, and a float corresponding to the new smoothed value. """ # Is the min number of episodes reached - if len(reward_buffer) < increment_condition.min_lesson_length: + if len(reward_buffer) < self.min_lesson_length: return False, smoothing - if ( - increment_condition.measure - == CompletionCriteriaSettings.MeasureType.PROGRESS - ): - if progress > increment_condition.threshold: + if self.measure == CompletionCriteriaSettings.MeasureType.PROGRESS: + if progress > self.threshold: return True, smoothing - if increment_condition.measure == CompletionCriteriaSettings.MeasureType.REWARD: + if self.measure == CompletionCriteriaSettings.MeasureType.REWARD: if len(reward_buffer) < 1: return False, smoothing measure = np.mean(reward_buffer) if math.isnan(measure): return False, smoothing - if increment_condition.signal_smoothing: + if self.signal_smoothing: measure = 0.25 * smoothing + 0.75 * measure smoothing = measure - if measure > increment_condition.threshold: + if measure > self.threshold: return True, smoothing return False, smoothing diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index 4bcc9fc460..0df04893eb 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -7,6 +7,7 @@ from mlagents.trainers.cli_utils import DetectDefault from mlagents_envs.exception import UnityEnvironmentException from mlagents.trainers.stats import StatsReporter +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager def basic_options(extra_args=None): @@ -65,20 +66,26 @@ def test_run_training( mock_env.academy_name = "TestAcademyName" create_environment_factory.return_value = mock_env load_config.return_value = yaml.safe_load(MOCK_INITIALIZE_YAML) - + mock_param_manager = MagicMock(return_value="mock_param_manager") mock_init = MagicMock(return_value=None) - with patch.object(TrainerController, "__init__", mock_init): - with patch.object(TrainerController, "start_learning", MagicMock()): - options = basic_options() - learn.run_training(0, options) - mock_init.assert_called_once_with( - trainer_factory_mock.return_value, "results/ppo", "ppo", None, True, 0 - ) - handle_dir_mock.assert_called_once_with( - "results/ppo", False, False, "results/notuselessrun" - ) - write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs") - write_run_options_mock.assert_called_once_with("results/ppo", options) + with patch.object(EnvironmentParameterManager, "__new__", mock_param_manager): + with patch.object(TrainerController, "__init__", mock_init): + with patch.object(TrainerController, "start_learning", MagicMock()): + options = basic_options() + learn.run_training(0, options) + mock_init.assert_called_once_with( + trainer_factory_mock.return_value, + "results/ppo", + "ppo", + "mock_param_manager", + True, + 0, + ) + handle_dir_mock.assert_called_once_with( + "results/ppo", False, False, "results/notuselessrun" + ) + write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs") + write_run_options_mock.assert_called_once_with("results/ppo", options) StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py index ca8e0c852c..82b6a11c7c 100644 --- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py +++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py @@ -26,6 +26,7 @@ TrainerType, RewardSignalType, ) +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.models import EncoderType, ScheduleType from mlagents_envs.side_channel.environment_parameters_channel import ( EnvironmentParametersChannel, @@ -111,6 +112,8 @@ def _check_environment_trains( success_threshold=0.9, env_manager=None, ): + if env_parameter_manager is None: + env_parameter_manager = EnvironmentParameterManager() # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py index ac02f0323b..a31cfd4fbb 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py @@ -3,6 +3,7 @@ from mlagents.tf_utils import tf from mlagents.trainers.trainer_controller import TrainerController +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.ghost.controller import GhostController @@ -14,7 +15,7 @@ def basic_trainer_controller(): trainer_factory=trainer_factory_mock, output_path="test_model_path", run_id="test_run_id", - param_manager=None, + param_manager=EnvironmentParameterManager(), train=True, training_seed=99, ) diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_util.py b/ml-agents/mlagents/trainers/tests/test_trainer_util.py index ca10e1290e..2239f034b6 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py @@ -10,6 +10,7 @@ from mlagents.trainers.brain import BrainParameters from mlagents.trainers.settings import RunOptions from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager @pytest.fixture @@ -56,6 +57,7 @@ def mock_constructor( train_model=train_model, load_model=load_model, seed=seed, + param_manager=EnvironmentParameterManager(), ) trainers = {} for brain_name, brain_parameters in external_brains.items(): @@ -86,6 +88,7 @@ def test_handles_no_config_provided(BrainParametersMock): train_model=True, load_model=False, seed=42, + param_manager=EnvironmentParameterManager(), ) trainer_factory.generate(brain_parameters.brain_name) diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index fac61b044a..c4b612b8d8 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -4,7 +4,7 @@ import os import threading -from typing import Dict, Optional, Set, List +from typing import Dict, Set, List from collections import defaultdict import numpy as np @@ -36,7 +36,7 @@ def __init__( trainer_factory: TrainerFactory, output_path: str, run_id: str, - param_manager: Optional[EnvironmentParameterManager], + param_manager: EnvironmentParameterManager, train: bool, training_seed: int, ): @@ -110,9 +110,7 @@ def _reset_env(self, env: EnvManager) -> None: A Data structure corresponding to the initial reset state of the environment. """ - new_config = ( - self.param_manager.get_current_samplers() if self.param_manager else {} - ) + new_config = self.param_manager.get_current_samplers() env.reset(config=new_config) def _not_done_training(self) -> bool: @@ -217,18 +215,15 @@ def end_trainer_episodes(self) -> None: trainer.end_episode() def reset_env_if_ready(self, env: EnvManager) -> None: - if self.param_manager: - # Get the sizes of the reward buffers. - reward_buff = {k: list(t.reward_buffer) for (k, t) in self.trainers.items()} - curr_step = {k: int(t.step) for (k, t) in self.trainers.items()} - max_step = {k: int(t.get_max_steps) for (k, t) in self.trainers.items()} - # Attempt to increment the lessons of the brains who - # were ready. - updated, param_must_reset = self.param_manager.update_lessons( - curr_step, max_step, reward_buff - ) - else: - updated, param_must_reset = False, False + # Get the sizes of the reward buffers. + reward_buff = {k: list(t.reward_buffer) for (k, t) in self.trainers.items()} + curr_step = {k: int(t.step) for (k, t) in self.trainers.items()} + max_step = {k: int(t.get_max_steps) for (k, t) in self.trainers.items()} + # Attempt to increment the lessons of the brains who + # were ready. + updated, param_must_reset = self.param_manager.update_lessons( + curr_step, max_step, reward_buff + ) if updated: for trainer in self.trainers.values(): trainer.reward_buffer.clear() @@ -237,7 +232,7 @@ def reset_env_if_ready(self, env: EnvManager) -> None: if param_must_reset or ghost_controller_reset: self._reset_env(env) # This reset also sends the new config to env self.end_trainer_episodes() - elif updated and self.param_manager: + elif updated: env.set_env_parameters(self.param_manager.get_current_samplers()) @timed @@ -247,15 +242,14 @@ def advance(self, env: EnvManager) -> int: num_steps = env.advance() # Report current lesson for each environment parameter - if self.param_manager: - for ( - param_name, - lesson_number, - ) in self.param_manager.get_current_lesson_number().items(): - for trainer in self.trainers.values(): - trainer.stats_reporter.set_stat( - f"Environment/Lesson/{param_name}", lesson_number - ) + for ( + param_name, + lesson_number, + ) in self.param_manager.get_current_lesson_number().items(): + for trainer in self.trainers.values(): + trainer.stats_reporter.set_stat( + f"Environment/Lesson/{param_name}", lesson_number + ) for trainer in self.trainers.values(): if not trainer.threaded: diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 8ee90c3402..01fe654dbc 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -1,5 +1,5 @@ import os -from typing import Dict, Optional +from typing import Dict from mlagents_envs.logging_util import get_logger from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager @@ -24,8 +24,8 @@ def __init__( train_model: bool, load_model: bool, seed: int, + param_manager: EnvironmentParameterManager, init_path: str = None, - param_manager: Optional[EnvironmentParameterManager] = None, multi_gpu: bool = False, ): self.trainer_config = trainer_config @@ -47,8 +47,8 @@ def generate(self, brain_name: str) -> Trainer: self.load_model, self.ghost_controller, self.seed, - self.init_path, self.param_manager, + self.init_path, self.multi_gpu, ) @@ -61,8 +61,8 @@ def initialize_trainer( load_model: bool, ghost_controller: GhostController, seed: int, + param_manager: EnvironmentParameterManager, init_path: str = None, - param_manager: Optional[EnvironmentParameterManager] = None, multi_gpu: bool = False, ) -> Trainer: """ @@ -77,17 +77,15 @@ def initialize_trainer( :param load_model: Whether to load the model or randomly initialize :param ghost_controller: The object that coordinates ghost trainers :param seed: The random seed to use + :param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer :param init_path: Path from which to load model, if different from model_path. - :param param_manager: Optional EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer :return: """ trainer_artifact_path = os.path.join(output_path, brain_name) if init_path is not None: trainer_settings.init_path = os.path.join(init_path, brain_name) - min_lesson_length = 1 - if param_manager: - min_lesson_length = param_manager.get_minimum_reward_buffer_size(brain_name) + min_lesson_length = param_manager.get_minimum_reward_buffer_size(brain_name) trainer: Trainer = None # type: ignore # will be set to one of these, or raise trainer_type = trainer_settings.trainer_type