Unity-Technologies · chriselion · Feb 2, 2021 · Feb 2, 2021 · Feb 2, 2021 · Feb 16, 2021
diff --git a/ml-agents/mlagents/trainers/buffer.py b/ml-agents/mlagents/trainers/buffer.py
@@ -425,8 +425,10 @@ def resequence_and_append(
         if key_list is None:
             key_list = list(self.keys())
         if not self.check_length(key_list):
+            lengths = {k: len(self._fields[k]) for k in key_list}
+            lengths_str = "\n\t".join(str((k, v)) for k, v in lengths.items())
             raise BufferException(
-                f"The length of the fields {key_list} were not of same length"
+                f"The length of the fields were not of same length: {lengths_str}"
             )
         for field_key in key_list:
             target_buffer[field_key].extend(

diff --git a/ml-agents/mlagents/trainers/demonstrations/__init__.py b/ml-agents/mlagents/trainers/demonstrations/__init__.py
diff --git a/ml-agents/mlagents/trainers/demonstrations/demonstration_proto_utils.py b/ml-agents/mlagents/trainers/demonstrations/demonstration_proto_utils.py
@@ -0,0 +1,96 @@
+import os
+from typing import List, Tuple
+import numpy as np
+from mlagents.trainers.buffer import AgentBuffer
+from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
+    AgentInfoActionPairProto,
+)
+from mlagents.trainers.trajectory import ObsUtil
+from mlagents_envs.rpc_utils import behavior_spec_from_proto, steps_from_proto
+from mlagents_envs.base_env import BehaviorSpec
+from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
+from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
+    DemonstrationMetaProto,
+)
+from mlagents_envs.timers import timed, hierarchical_timer
+from google.protobuf.internal.decoder import _DecodeVarint32  # type: ignore
+from google.protobuf.internal.encoder import _EncodeVarint  # type: ignore
+
+
+INITIAL_POS = 33
+SUPPORTED_DEMONSTRATION_VERSIONS = frozenset([0, 1])
+
+
+@timed
+def load_demonstration(
+    file_paths: List[str],
+) -> Tuple[BehaviorSpec, List[AgentInfoActionPairProto]]:
+    """
+    Loads and parses a demonstration file.
+    :param file_path: Location of demonstration file (.demo).
+    :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data.
+    """
+
+    # First 32 bytes of file dedicated to meta-data.
+    behavior_spec = None
+    brain_param_proto = None
+    info_action_pairs = []
+    total_expected = 0
+    for _file_path in file_paths:
+        with open(_file_path, "rb") as fp:
+            with hierarchical_timer("read_file"):
+                data = fp.read()
+            next_pos, pos, obs_decoded = 0, 0, 0
+            while pos < len(data):
+                next_pos, pos = _DecodeVarint32(data, pos)
+                if obs_decoded == 0:
+                    meta_data_proto = DemonstrationMetaProto()
+                    meta_data_proto.ParseFromString(data[pos : pos + next_pos])
+                    if (
+                        meta_data_proto.api_version
+                        not in SUPPORTED_DEMONSTRATION_VERSIONS
+                    ):
+                        raise RuntimeError(
+                            f"Can't load Demonstration data from an unsupported version ({meta_data_proto.api_version})"
+                        )
+                    total_expected += meta_data_proto.number_steps
+                    pos = INITIAL_POS
+                if obs_decoded == 1:
+                    brain_param_proto = BrainParametersProto()
+                    brain_param_proto.ParseFromString(data[pos : pos + next_pos])
+                    pos += next_pos
+                if obs_decoded > 1:
+                    agent_info_action = AgentInfoActionPairProto()
+                    agent_info_action.ParseFromString(data[pos : pos + next_pos])
+                    if behavior_spec is None:
+                        behavior_spec = behavior_spec_from_proto(
+                            brain_param_proto, agent_info_action.agent_info
+                        )
+                    info_action_pairs.append(agent_info_action)
+                    if len(info_action_pairs) == total_expected:
+                        break
+                    pos += next_pos
+                obs_decoded += 1
+    if not behavior_spec:
+        raise RuntimeError(
+            f"No BrainParameters found in demonstration file(s) at {file_paths}."
+        )
+    return behavior_spec, info_action_pairs
+
+
+def write_delimited(f, message):
+    msg_string = message.SerializeToString()
+    msg_size = len(msg_string)
+    _EncodeVarint(f.write, msg_size)
+    f.write(msg_string)
+
+
+def write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos):
+    with open(demo_path, "wb") as f:
+        # write metadata
+        write_delimited(f, meta_data_proto)
+        f.seek(INITIAL_POS)
+        write_delimited(f, brain_param_proto)
+
+        for agent in agent_info_protos:
+            write_delimited(f, agent)
diff --git a/ml-agents/mlagents/trainers/demonstrations/demonstration_provider.py b/ml-agents/mlagents/trainers/demonstrations/demonstration_provider.py
@@ -0,0 +1,72 @@
+import abc
+
+import numpy as np
+
+from typing import List, NamedTuple
+
+from mlagents_envs.base_env import ActionTuple, BehaviorSpec
+
+from mlagents.trainers.buffer import AgentBuffer, BufferKey
+from mlagents.trainers.trajectory import ObsUtil
+
+
+class DemonstrationExperience(NamedTuple):
+    obs: List[np.ndarray]
+    reward: float
+    done: bool
+    action: ActionTuple
+    prev_action: np.ndarray
+    interrupted: bool
+
+
+class DemonstrationTrajectory(NamedTuple):
+    experiences: List[DemonstrationExperience]
+
+    def to_agentbuffer(self) -> AgentBuffer:
+        """
+        Converts a Trajectory to an AgentBuffer
+        :param trajectory: A Trajectory
+        :returns: AgentBuffer. Note that the length of the AgentBuffer will be one
+        less than the trajectory, as the next observation need to be populated from the last
+        step of the trajectory.
+        """
+        agent_buffer_trajectory = AgentBuffer()
+        for exp in self.experiences:
+            for i, obs in enumerate(exp.obs):
+                agent_buffer_trajectory[ObsUtil.get_name_at(i)].append(obs)
+
+            # TODO Not in demo_loader
+            agent_buffer_trajectory[BufferKey.MASKS].append(1.0)
+            agent_buffer_trajectory[BufferKey.DONE].append(exp.done)
+
+            agent_buffer_trajectory[BufferKey.CONTINUOUS_ACTION].append(
+                exp.action.continuous
+            )
+            agent_buffer_trajectory[BufferKey.DISCRETE_ACTION].append(
+                exp.action.discrete
+            )
+
+            agent_buffer_trajectory[BufferKey.PREV_ACTION].append(exp.prev_action)
+            agent_buffer_trajectory[BufferKey.ENVIRONMENT_REWARDS].append(exp.reward)
+
+        return agent_buffer_trajectory
+
+
+class DemonstrationProvider(abc.ABC):
+    @abc.abstractmethod
+    def get_behavior_spec(self) -> BehaviorSpec:
+        pass
+
+    @abc.abstractmethod
+    def pop_trajectories(self) -> List[DemonstrationTrajectory]:
+        pass
+
+    def to_agentbuffer(self, training_length: int) -> AgentBuffer:
+        buffer_out = AgentBuffer()
+        trajectories = self.pop_trajectories()
+        for trajectory in trajectories:
+            temp_buffer = trajectory.to_agentbuffer()
+            temp_buffer.resequence_and_append(
+                buffer_out, batch_size=None, training_length=training_length
+            )
+        return buffer_out
diff --git a/ml-agents/mlagents/trainers/demonstrations/local_demonstration_provider.py b/ml-agents/mlagents/trainers/demonstrations/local_demonstration_provider.py
@@ -0,0 +1,146 @@
+import os
+from typing import List
+import numpy as np
+
+
+from mlagents_envs.base_env import ActionTuple, BehaviorSpec, ActionSpec
+from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
+    AgentInfoActionPairProto,
+)
+from mlagents_envs.rpc_utils import steps_from_proto
+
+
+from mlagents.trainers.demonstrations.demonstration_provider import (
+    DemonstrationProvider,
+    DemonstrationExperience,
+    DemonstrationTrajectory,
+)
+from mlagents.trainers.demonstrations.demonstration_proto_utils import (
+    load_demonstration,
+)
+
+
+class LocalDemonstrationProvider(DemonstrationProvider):
+    def __init__(self, file_path: str):
+        super().__init__()
+
+        demo_paths = self._get_demo_files(file_path)
+        behavior_spec, info_action_pairs, = load_demonstration(demo_paths)
+        self._behavior_spec = behavior_spec
+        self._info_action_pairs = info_action_pairs
+
+    def get_behavior_spec(self) -> BehaviorSpec:
+        return self._behavior_spec
+
+    def pop_trajectories(self) -> List[DemonstrationTrajectory]:
+        trajectories = LocalDemonstrationProvider._info_action_pairs_to_trajectories(
+            self._behavior_spec, self._info_action_pairs
+        )
+        self._info_action_pairs = []
+        return trajectories
+
+    @staticmethod
+    def _get_demo_files(path: str) -> List[str]:
+        """
+        Retrieves the demonstration file(s) from a path.
+        :param path: Path of demonstration file or directory.
+        :return: List of demonstration files
+
+        Raises errors if |path| is invalid.
+        """
+        if os.path.isfile(path):
+            if not path.endswith(".demo"):
+                raise ValueError("The path provided is not a '.demo' file.")
+            return [path]
+        elif os.path.isdir(path):
+            paths = [
+                os.path.join(path, name)
+                for name in os.listdir(path)
+                if name.endswith(".demo")
+            ]
+            if not paths:
+                raise ValueError(
+                    "There are no '.demo' files in the provided directory."
+                )
+            return paths
+        else:
+            raise FileNotFoundError(
+                f"The demonstration file or directory {path} does not exist."
+            )
+
+    @staticmethod
+    def _info_action_pairs_to_trajectories(
+        behavior_spec: BehaviorSpec, info_action_pairs: List[AgentInfoActionPairProto]
+    ) -> List[DemonstrationTrajectory]:
+        trajectories_out: List[DemonstrationTrajectory] = []
+        current_experiences = []
+        previous_action = np.zeros(
+            behavior_spec.action_spec.continuous_size, dtype=np.float32
+        )  # TODO or discrete?
+        for pair_index, pair in enumerate(info_action_pairs):
+
+            # Extract the observations from the decision/terminal steps
+            current_decision_step, current_terminal_step = steps_from_proto(
+                [pair.agent_info], behavior_spec
+            )
+            if len(current_terminal_step) == 1:
+                obs = list(current_terminal_step.values())[0].obs
+            else:
+                obs = list(current_decision_step.values())[0].obs
+
+            action_tuple = LocalDemonstrationProvider._get_action_tuple(
+                pair, behavior_spec.action_spec
+            )
+
+            exp = DemonstrationExperience(
+                obs=obs,
+                reward=pair.agent_info.reward,  # TODO next step's reward?
+                done=pair.agent_info.done,
+                action=action_tuple,
+                prev_action=previous_action,
+                interrupted=pair.agent_info.max_step_reached,
+            )
+            current_experiences.append(exp)
+            previous_action = np.array(
+                pair.action_info.vector_actions_deprecated, dtype=np.float32
+            )
+            if pair.agent_info.done or pair_index == len(info_action_pairs) - 1:
+                trajectories_out.append(
+                    DemonstrationTrajectory(experiences=current_experiences)
+                )
+                current_experiences = []
+
+        return trajectories_out
+
+    @staticmethod
+    def _get_action_tuple(
+        pair: AgentInfoActionPairProto, action_spec: ActionSpec
+    ) -> ActionTuple:
+        continuous_actions = None
+        discrete_actions = None
+
+        if (
+            len(pair.action_info.continuous_actions) == 0
+            and len(pair.action_info.discrete_actions) == 0
+        ):
+            if action_spec.continuous_size > 0:
+                continuous_actions = pair.action_info.vector_actions_deprecated
+            else:
+                discrete_actions = pair.action_info.vector_actions_deprecated
+        else:
+            if action_spec.continuous_size > 0:
+                continuous_actions = pair.action_info.continuous_actions
+            if action_spec.discrete_size > 0:
+                discrete_actions = pair.action_info.discrete_actions
+
+        # TODO 2D?
+        continuous_np = (
+            np.array(continuous_actions, dtype=np.float32)
+            if continuous_actions
+            else None
+        )
+        discrete_np = (
+            np.array(discrete_actions, dtype=np.float32) if discrete_actions else None
+        )
+
+        return ActionTuple(continuous_np, discrete_np)