Skip to content

Commit 088cbe9

Browse files
Develop add fire exp framework (#4213)
* Experiment branch for comparing torch * Updates and merging ervin changes * improvements on experiment_torch.py * Better printing of results * preliminary gpu experiment * Testing gpu * Prepare to see a lot of commits, because I like my IDE and I am testing on a server and I am using git to sync the two * Prepare to see a lot of commits, because I like my IDE and I am testing on a server and I am using git to sync the two * _ * _ * _ * _ * _ * _ * _ * _ * Attempt at gpu on tf. Does not work * _ * _ * _ * _ * _ * _ * _ * _ * _ * _ * _ * Fixing learn.py
1 parent b3ca0c9 commit 088cbe9

File tree

8 files changed

+195
-41
lines changed

8 files changed

+195
-41
lines changed

experiment_torch.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
2+
import json
3+
import os
4+
import torch
5+
import tensorflow as tf
6+
import argparse
7+
from mlagents.trainers.learn import run_cli, parse_command_line
8+
from mlagents.trainers.settings import RunOptions
9+
from mlagents.trainers.stats import StatsReporter
10+
from mlagents.trainers.ppo.trainer import TestingConfiguration
11+
from mlagents_envs.timers import _thread_timer_stacks
12+
13+
14+
15+
16+
def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool, num_envs :int= 1, config_name=None):
17+
TestingConfiguration.env_name = name
18+
TestingConfiguration.max_steps = steps
19+
TestingConfiguration.use_torch = use_torch
20+
TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
21+
if use_gpu:
22+
tf.device("/GPU:0")
23+
else:
24+
tf.device("/device:CPU:0")
25+
if (not torch.cuda.is_available() and use_gpu):
26+
return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), "na","na","na","na","na","na","na"
27+
if config_name is None:
28+
config_name = name
29+
run_options = parse_command_line([f"config/ppo/{config_name}.yaml", "--num-envs", f"{num_envs}"])
30+
run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf")
31+
run_options.checkpoint_settings.force = True
32+
# run_options.env_settings.num_envs = num_envs
33+
for trainer_settings in run_options.behaviors.values():
34+
trainer_settings.threaded = False
35+
timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json")
36+
if use_torch:
37+
torch.set_num_threads(num_torch_threads)
38+
run_cli(run_options)
39+
StatsReporter.writers.clear()
40+
StatsReporter.stats_dict.clear()
41+
_thread_timer_stacks.clear()
42+
with open(timers_path) as timers_json_file:
43+
timers_json = json.load(timers_json_file)
44+
total = timers_json["total"]
45+
tc_advance = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]
46+
evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"]
47+
update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"]
48+
tc_advance_total = tc_advance["total"]
49+
tc_advance_count = tc_advance["count"]
50+
if use_torch:
51+
update_total = update["TorchPPOOptimizer.update"]["total"]
52+
evaluate_total = evaluate["TorchPolicy.evaluate"]["total"]
53+
update_count = update["TorchPPOOptimizer.update"]["count"]
54+
evaluate_count = evaluate["TorchPolicy.evaluate"]["count"]
55+
else:
56+
update_total = update["TFPPOOptimizer.update"]["total"]
57+
evaluate_total = evaluate["NNPolicy.evaluate"]["total"]
58+
update_count = update["TFPPOOptimizer.update"]["count"]
59+
evaluate_count= evaluate["NNPolicy.evaluate"]["count"]
60+
# todo: do total / count
61+
return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), str(total), str(tc_advance_total), str(tc_advance_count), str(update_total), str(update_count), str(evaluate_total), str(evaluate_count)
62+
63+
64+
def main():
65+
parser = argparse.ArgumentParser()
66+
parser.add_argument("--steps", default=25000, type=int, help="The number of steps")
67+
parser.add_argument("--num-envs", default=1, type=int, help="The number of envs")
68+
parser.add_argument("--gpu", default = False, action="store_true", help="If true, will use the GPU")
69+
parser.add_argument("--threads", default=False, action="store_true", help="If true, will try both 1 and 8 threads for torch")
70+
parser.add_argument("--ball", default=False, action="store_true", help="If true, will only do 3dball")
71+
args = parser.parse_args()
72+
73+
if args.gpu:
74+
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
75+
else:
76+
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
77+
78+
envs_config_tuples = [("3DBall", "3DBall"), ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "VisualHallway")]
79+
if args.ball:
80+
envs_config_tuples=[("3DBall", "3DBall")]
81+
82+
83+
labels = ("name", "steps", "use_torch", "num_torch_threads", "num_envs", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")
84+
85+
results = []
86+
results.append(labels)
87+
f = open(f"result_data_steps_{args.steps}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt", "w")
88+
f.write(" ".join(labels)+ "\n")
89+
90+
for env_config in envs_config_tuples:
91+
data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1])
92+
results.append(data)
93+
f.write(" ".join(data) + "\n")
94+
95+
if args.threads:
96+
data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1])
97+
results.append(data)
98+
f.write(" ".join(data)+ "\n")
99+
100+
101+
data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1])
102+
results.append(data)
103+
f.write(" ".join(data)+ "\n")
104+
for r in results:
105+
print(*r)
106+
f.close()
107+
108+
109+
if __name__ == "__main__":
110+
main()
111+

ml-agents/mlagents/trainers/learn.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
)
3636
from mlagents_envs import logging_util
3737

38+
from mlagents.trainers.ppo.trainer import TestingConfiguration
39+
from mlagents_envs.registry import default_registry
40+
3841
logger = logging_util.get_logger(__name__)
3942

4043
TRAINING_STATUS_FILE_NAME = "training_status.json"
@@ -233,16 +236,27 @@ def create_unity_environment(
233236
) -> UnityEnvironment:
234237
# Make sure that each environment gets a different seed
235238
env_seed = seed + worker_id
236-
return UnityEnvironment(
237-
file_name=env_path,
238-
worker_id=worker_id,
239-
seed=env_seed,
240-
no_graphics=no_graphics,
241-
base_port=start_port,
242-
additional_args=env_args,
243-
side_channels=side_channels,
244-
log_folder=log_folder,
245-
)
239+
if TestingConfiguration.env_name == "":
240+
return UnityEnvironment(
241+
file_name=env_path,
242+
worker_id=worker_id,
243+
seed=env_seed,
244+
no_graphics=no_graphics,
245+
base_port=start_port,
246+
additional_args=env_args,
247+
side_channels=side_channels,
248+
log_folder=log_folder,
249+
)
250+
else:
251+
return default_registry[TestingConfiguration.env_name].make(
252+
seed=env_seed,
253+
no_graphics=no_graphics,
254+
base_port=start_port,
255+
worker_id=worker_id,
256+
additional_args=env_args,
257+
side_channels=side_channels,
258+
log_folder=log_folder,
259+
)
246260

247261
return create_unity_environment
248262

ml-agents/mlagents/trainers/models_torch.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1):
136136
if self.use_lstm:
137137
embedding = embedding.view([sequence_length, -1, self.h_size])
138138
memories = torch.split(memories, self.m_size // 2, dim=-1)
139-
embedding, memories = self.lstm(embedding, memories)
139+
embedding, memories = self.lstm(embedding.contiguous(), (memories[0].contiguous(), memories[1].contiguous()))
140140
embedding = embedding.view([-1, self.m_size // 2])
141141
memories = torch.cat(memories, dim=-1)
142142
return embedding, memories
@@ -407,7 +407,8 @@ def __init__(self, height, width, initial_channels, output_size):
407407
def forward(self, visual_obs):
408408
conv_1 = torch.relu(self.conv1(visual_obs))
409409
conv_2 = torch.relu(self.conv2(conv_1))
410-
hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat])))
410+
# hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat])))
411+
hidden = torch.relu(self.dense(torch.reshape(conv_2,(-1, self.final_flat))))
411412
return hidden
412413

413414

ml-agents/mlagents/trainers/optimizer/torch_optimizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,8 @@ def get_trajectory_value_estimates(
106106
)
107107

108108
for name, estimate in value_estimates.items():
109-
value_estimates[name] = estimate.detach().numpy()
110-
next_value_estimate[name] = next_value_estimate[name].detach().numpy()
109+
value_estimates[name] = estimate.detach().cpu().numpy()
110+
next_value_estimate[name] = next_value_estimate[name].detach().cpu().numpy()
111111

112112
if done:
113113
for k in next_value_estimate:

ml-agents/mlagents/trainers/policy/nn_policy.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
MultiCategoricalDistribution,
1313
)
1414

15+
from mlagents.trainers.ppo.trainer import TestingConfiguration
16+
1517
EPSILON = 1e-6 # Small value to avoid divide by zero
1618

1719

ml-agents/mlagents/trainers/policy/torch_policy.py

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from mlagents.trainers.brain import BrainParameters
1818
from mlagents.trainers.models_torch import ActorCritic
1919

20+
from mlagents.trainers.ppo.trainer import TestingConfiguration
21+
2022
EPSILON = 1e-7 # Small value to avoid divide by zero
2123

2224

@@ -91,6 +93,12 @@ def __init__(
9193
self.log_std_min = -20
9294
self.log_std_max = 2
9395

96+
if TestingConfiguration.device != "cpu":
97+
torch.set_default_tensor_type(torch.cuda.FloatTensor)
98+
else:
99+
torch.set_default_tensor_type(torch.FloatTensor)
100+
101+
94102
self.inference_dict: Dict[str, tf.Tensor] = {}
95103
self.update_dict: Dict[str, tf.Tensor] = {}
96104

@@ -117,6 +125,8 @@ def __init__(
117125
separate_critic=self.use_continuous_act,
118126
)
119127

128+
self.actor_critic.to(TestingConfiguration.device)
129+
120130
def split_decision_step(self, decision_requests):
121131
vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
122132
mask = None
@@ -190,18 +200,18 @@ def evaluate(
190200
action, log_probs, entropy, value_heads, memories = self.sample_actions(
191201
vec_obs, vis_obs, masks=masks, memories=memories
192202
)
193-
run_out["action"] = action.detach().numpy()
194-
run_out["pre_action"] = action.detach().numpy()
203+
run_out["action"] = action.detach().cpu().numpy()
204+
run_out["pre_action"] = action.detach().cpu().numpy()
195205
# Todo - make pre_action difference
196-
run_out["log_probs"] = log_probs.detach().numpy()
197-
run_out["entropy"] = entropy.detach().numpy()
206+
run_out["log_probs"] = log_probs.detach().cpu().numpy()
207+
run_out["entropy"] = entropy.detach().cpu().numpy()
198208
run_out["value_heads"] = {
199-
name: t.detach().numpy() for name, t in value_heads.items()
209+
name: t.detach().cpu().numpy() for name, t in value_heads.items()
200210
}
201211
run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0)
202212
run_out["learning_rate"] = 0.0
203213
if self.use_recurrent:
204-
run_out["memories"] = memories.detach().numpy()
214+
run_out["memories"] = memories.detach().cpu().numpy()
205215
self.actor_critic.update_normalization(vec_obs)
206216
return run_out
207217

@@ -249,24 +259,28 @@ def load_model(self, step=0):
249259
self.actor_critic.load_state_dict(torch.load(load_path))
250260

251261
def export_model(self, step=0):
252-
fake_vec_obs = [torch.zeros([1] + [self.brain.vector_observation_space_size])]
253-
fake_vis_obs = [torch.zeros([1] + [84, 84, 3])]
254-
fake_masks = torch.ones([1] + self.actor_critic.act_size)
255-
# fake_memories = torch.zeros([1] + [self.m_size])
256-
export_path = "./model-" + str(step) + ".onnx"
257-
output_names = ["action", "action_probs"]
258-
input_names = ["vector_observation", "action_mask"]
259-
dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]}
260-
onnx.export(
261-
self.actor_critic,
262-
(fake_vec_obs, fake_vis_obs, fake_masks),
263-
export_path,
264-
verbose=True,
265-
opset_version=12,
266-
input_names=input_names,
267-
output_names=output_names,
268-
dynamic_axes=dynamic_axes,
269-
)
262+
try:
263+
fake_vec_obs = [torch.zeros([1] + [self.brain.vector_observation_space_size])]
264+
fake_vis_obs = [torch.zeros([1] + [84, 84, 3])]
265+
fake_masks = torch.ones([1] + self.actor_critic.act_size)
266+
# fake_memories = torch.zeros([1] + [self.m_size])
267+
export_path = "./model-" + str(step) + ".onnx"
268+
output_names = ["action", "action_probs"]
269+
input_names = ["vector_observation", "action_mask"]
270+
dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]}
271+
onnx.export(
272+
self.actor_critic,
273+
(fake_vec_obs, fake_vis_obs, fake_masks),
274+
export_path,
275+
verbose=True,
276+
opset_version=12,
277+
input_names=input_names,
278+
output_names=output_names,
279+
dynamic_axes=dynamic_axes,
280+
)
281+
except:
282+
print("Could not export torch model")
283+
return
270284

271285
@property
272286
def vis_obs_size(self):

ml-agents/mlagents/trainers/ppo/optimizer_torch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,8 @@ def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
143143

144144
self.optimizer.step()
145145
update_stats = {
146-
"Losses/Policy Loss": abs(policy_loss.detach().numpy()),
147-
"Losses/Value Loss": value_loss.detach().numpy(),
146+
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
147+
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
148148
}
149149

150150
return update_stats

ml-agents/mlagents/trainers/ppo/trainer.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
# ## ML-Agent Learning (PPO)
33
# Contains an implementation of PPO as described in: https://arxiv.org/abs/1707.06347
44

5+
class TestingConfiguration:
6+
use_torch = False
7+
max_steps = 0
8+
env_name = ""
9+
device = "cpu"
10+
11+
12+
513
from collections import defaultdict
614
from typing import cast
715

@@ -22,6 +30,8 @@
2230
logger = get_logger(__name__)
2331

2432

33+
34+
2535
class PPOTrainer(RLTrainer):
2636
"""The PPOTrainer is an implementation of the PPO algorithm."""
2737

@@ -53,7 +63,9 @@ def __init__(
5363
)
5464
self.load = load
5565
self.seed = seed
56-
self.framework = "torch"
66+
self.framework = "torch" if TestingConfiguration.use_torch else "tf"
67+
if TestingConfiguration.max_steps > 0:
68+
self.trainer_settings.max_steps = TestingConfiguration.max_steps
5769
self.policy: Policy = None # type: ignore
5870

5971
def _process_trajectory(self, trajectory: Trajectory) -> None:

0 commit comments

Comments
 (0)