Skip to content

Commit 8bbf72e

Browse files
authored
Merge pull request #154 from itwasabhi/update_to_gymnasium
Switch to gymnasium in favor of openai gym
2 parents 3138131 + 06090f2 commit 8bbf72e

File tree

9 files changed

+145
-168
lines changed

9 files changed

+145
-168
lines changed

.github/workflows/pythonlint.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ jobs:
88
runs-on: ubuntu-latest
99
steps:
1010
- uses: actions/checkout@v3
11-
- name: Set up Python 3.7
11+
- name: Set up Python 3.8
1212
uses: actions/setup-python@v4
1313
with:
14-
python-version: '3.7'
14+
python-version: 3.8
1515
architecture: 'x64'
1616
- name: Install dependencies
1717
run: |

.github/workflows/pythontests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ jobs:
2121
runs-on: ubuntu-latest
2222
steps:
2323
- uses: actions/checkout@v3
24-
- name: Set up Python 3.7
24+
- name: Set up Python 3.8
2525
uses: actions/setup-python@v4
2626
with:
27-
python-version: '3.7'
27+
python-version: 3.8
2828
architecture: 'x64'
2929
- name: Install dependencies
3030
run: |

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ $ ./run_tests.sh
9292

9393
⚠️**Be sure to change your CWD to the human_aware_rl directory before running the script, as the test script uses the CWD to dynamically generate a path to save temporary training runs/checkpoints. The testing script will fail if not being run from the correct directory.**
9494

95-
This will run all tests belonging to the human_aware_rl module. You can checkout the README in the submodule for instructions of running target-specific tests. This can be initiated from any directory.
95+
This will run all tests belonging to the human_aware_rl module. _These tests don't work anymore out of the box, due to package version issues_: if you fix them, feel free to make a PR. You can checkout the README in the submodule for instructions of running target-specific tests. This can be initiated from any directory.
9696

9797
If you're thinking of using the planning code extensively, you should run the full testing suite that verifies all of the Overcooked accessory tools (this can take 5-10 mins):
9898
```

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@
4242
"numpy",
4343
"scipy",
4444
"tqdm",
45-
"gym",
46-
"pettingzoo",
45+
"gymnasium",
4746
"ipython",
4847
"pygame",
4948
"ipywidgets",

src/human_aware_rl/imitation/behavior_cloning_tf2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,8 @@ def __init__(self, observation_space, action_space, config):
474474
"""
475475
RLLib compatible constructor for initializing a behavior cloning model
476476
477-
observation_space (gym.Space|tuple) Shape of the featurized observations
478-
action_space (gym.space|tuple) Shape of the action space (len(Action.All_ACTIONS),)
477+
observation_space (gymnasium.Space|tuple) Shape of the featurized observations
478+
action_space (gymnasium.space|tuple) Shape of the action space (len(Action.All_ACTIONS),)
479479
config (dict) Dictionary of relavant bc params
480480
- model_dir (str) Path to pickled keras.Model used to map observations to action logits
481481
- stochastic (bool) Whether action should return logit argmax or sample over distribution
@@ -519,7 +519,7 @@ def __init__(self, observation_space, action_space, config):
519519
self.context = self._create_execution_context()
520520

521521
def _setup_shapes(self):
522-
# This is here to make the class compatible with both tuples or gym.Space objs for the spaces
522+
# This is here to make the class compatible with both tuples or gymnasium.Space objs for the spaces
523523
# Note: action_space = (len(Action.ALL_ACTIONS,)) is technically NOT the action space shape, which would be () since actions are scalars
524524
self.observation_shape = (
525525
self.observation_space

src/human_aware_rl/rllib/rllib.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from datetime import datetime
77

88
import dill
9-
import gym
9+
import gymnasium
1010
import numpy as np
1111
import ray
1212
from ray.rllib.agents.ppo import PPOTrainer
@@ -32,8 +32,8 @@
3232
OvercookedGridworld,
3333
)
3434

35-
action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
36-
obs_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
35+
action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
36+
obs_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
3737
timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
3838

3939

@@ -218,9 +218,13 @@ def _validate_schedule(self, schedule):
218218
def _setup_action_space(self, agents):
219219
action_sp = {}
220220
for agent in agents:
221-
action_sp[agent] = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
222-
self.action_space = gym.spaces.Dict(action_sp)
223-
self.shared_action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
221+
action_sp[agent] = gymnasium.spaces.Discrete(
222+
len(Action.ALL_ACTIONS)
223+
)
224+
self.action_space = gymnasium.spaces.Dict(action_sp)
225+
self.shared_action_space = gymnasium.spaces.Discrete(
226+
len(Action.ALL_ACTIONS)
227+
)
224228

225229
def _setup_observation_space(self, agents):
226230
dummy_state = self.base_env.mdp.get_standard_start_state()
@@ -232,7 +236,7 @@ def _setup_observation_space(self, agents):
232236

233237
high = np.ones(obs_shape) * float("inf")
234238
low = np.ones(obs_shape) * 0
235-
self.ppo_observation_space = gym.spaces.Box(
239+
self.ppo_observation_space = gymnasium.spaces.Box(
236240
np.float32(low), np.float32(high), dtype=np.float32
237241
)
238242

@@ -243,7 +247,7 @@ def _setup_observation_space(self, agents):
243247
obs_shape = featurize_fn_bc(dummy_state)[0].shape
244248
high = np.ones(obs_shape) * 100
245249
low = np.ones(obs_shape) * -100
246-
self.bc_observation_space = gym.spaces.Box(
250+
self.bc_observation_space = gymnasium.spaces.Box(
247251
np.float32(low), np.float32(high), dtype=np.float32
248252
)
249253
# hardcode mapping between action space and agent
@@ -253,7 +257,7 @@ def _setup_observation_space(self, agents):
253257
ob_space[agent] = self.ppo_observation_space
254258
else:
255259
ob_space[agent] = self.bc_observation_space
256-
self.observation_space = gym.spaces.Dict(ob_space)
260+
self.observation_space = gymnasium.spaces.Dict(ob_space)
257261

258262
def _get_featurize_fn(self, agent_id):
259263
if agent_id.startswith("ppo"):

src/overcooked_ai_py/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from gym.envs.registration import register
1+
from gymnasium.envs.registration import register
22

33
register(
44
id="Overcooked-v0",

0 commit comments

Comments
 (0)