我试图模仿最简单的交错游戏,你必须预测它是否会成为头盔。 可悲的是,鉴于我:
Using cpu device
Traceback (most recent call last):
File "/home/user/python/simplegame.py", line 40, in <module>
model.learn(total_timesteps=10000)
File "/home/user/python/mypython3.10/lib/python3.10/site-packages/stable_baselines3/ppo/ppo.py", line 315, in learn
return super().learn(
File "/home/user/python/mypython3.10/lib/python3.10/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 264, in learn
total_timesteps, callback = self._setup_learn(
File "/home/user/python/mypython3.10/lib/python3.10/site-packages/stable_baselines3/common/base_class.py", line 423, in _setup_learn
self._last_obs = self.env.reset() # type: ignore[assignment]
File "/home/user/python/mypython3.10/lib/python3.10/site-packages/stable_baselines3/common/vec_env/dummy_vec_env.py", line 77, in reset
obs, self.reset_infos[env_idx] = self.envs[env_idx].reset(seed=self._seeds[env_idx], **maybe_options)
TypeError: CoinFlipEnv.reset() got an unexpected keyword argument seed
该守则是:
import gymnasium as gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
class CoinFlipEnv(gym.Env):
def __init__(self, heads_probability=0.8):
super(CoinFlipEnv, self).__init__()
self.action_space = gym.spaces.Discrete(2) # 0 for heads, 1 for tails
self.observation_space = gym.spaces.Discrete(2) # 0 for heads, 1 for tails
self.heads_probability = heads_probability
self.flip_result = None
def reset(self):
# Reset the environment
self.flip_result = None
return self._get_observation()
def step(self, action):
# Perform the action (0 for heads, 1 for tails)
self.flip_result = int(np.random.rand() < self.heads_probability)
# Compute the reward (1 for correct prediction, -1 for incorrect)
reward = 1 if self.flip_result == action else -1
# Return the observation, reward, done, and info
return self._get_observation(), reward, True, {}
def _get_observation(self):
# Return the current coin flip result
return self.flip_result
# Create the environment with heads probability of 0.8
env = DummyVecEnv([lambda: CoinFlipEnv(heads_probability=0.8)])
# Create the PPO model
model = PPO("MlpPolicy", env, verbose=1)
# Train the model
model.learn(total_timesteps=10000)
# Save the model
model.save("coin_flip_model")
# Evaluate the model
obs = env.reset()
for _ in range(10):
action, _states = model.predict(obs)
obs, rewards, dones, info = env.step(action)
print(f"Action: {action}, Observation: {obs}, Reward: {rewards}")
我做了什么错误?
页: 1