Save and Restore States
It is possible to save a state of the entire simulation environment. This is useful if your application requires lookahead search. Below is an example of a greedy random search.
import gymnasium as gym
import numpy as np
import panda_gym
env = gym.make("PandaReachDense-v3", render_mode="human")
observation, _ = env.reset()
for _ in range(1000):
state_id = env.save_state()
# Sample 5 actions and choose the one that yields the best reward.
best_reward = -np.inf
best_action = None
for _ in range(5):
env.restore_state(state_id)
action = env.action_space.sample()
observation, reward, _, _, _ = env.step(action)
if reward > best_reward:
best_reward = reward
best_action = action
env.restore_state(state_id)
env.remove_state(state_id) # discard the state, as it is no longer needed
# Step with the best action
observation, reward, terminated, truncated, info = env.step(best_action)
if terminated:
observation, info = env.reset()
env.close()