jeguzzi · July 3, 2025 15:00
diff --git a/train.py b/train.py
 from navground import sim
 from navground.learning import ControlActionConfig, DefaultObservationConfig, GroupConfig
 from navground.learning.parallel_env import parallel_env, make_vec_from_penv
 from navground.learning.rewards import SocialReward
 from stable_baselines3 import SAC

 # 1. Load a scenario
 scenario = sim.load_scenario(...)

 # 2. Create a training environmnent
 action_config = ControlActionConfig(max_acceleration=1.0, max_angular_acceleration=10.0,
                                    use_acceleration_action=True)
 sensor = sim.load_sensor(...)
 observation_config = DefaultObservationConfig(include_target_direction=True, include_velocity=True, 
                                              include_angular_speed=True, flat=True)
 # The wheelchairs have indices 0, 1, 2, 3 and should be controlled by a policy.
 group = GroupConfig(indices=(0, 1, 2, 3), action=action_config, observation=observation_config,
                    sensor=sensor, reward=SocialReward(safety_margin=0.2))
 # The other agents (i.e., humans) will be controlled by the original model-based behavior.
 train_penv = parallel_env(scenario=scenario, groups=[group], time_step=0.1, max_duration=120, 
                          include_success=False)
 train_venv = make_vec_from_penv(train_penv, seed=0, monitor=True, monitor_keywords=())

 # 3. Train a policy
 sac = SAC("MlpPolicy", train_venv)
 sac.learn(total_timesteps=300_000)
	from navground import sim
	from navground.learning import ControlActionConfig, DefaultObservationConfig, GroupConfig
	from navground.learning.parallel_env import parallel_env, make_vec_from_penv
	from navground.learning.rewards import SocialReward
	from stable_baselines3 import SAC

	# 1. Load a scenario
	scenario = sim.load_scenario(...)

	# 2. Create a training environmnent
	action_config = ControlActionConfig(max_acceleration=1.0, max_angular_acceleration=10.0,
	use_acceleration_action=True)
	sensor = sim.load_sensor(...)
	observation_config = DefaultObservationConfig(include_target_direction=True, include_velocity=True,
	include_angular_speed=True, flat=True)
	# The wheelchairs have indices 0, 1, 2, 3 and should be controlled by a policy.
	group = GroupConfig(indices=(0, 1, 2, 3), action=action_config, observation=observation_config,
	sensor=sensor, reward=SocialReward(safety_margin=0.2))
	# The other agents (i.e., humans) will be controlled by the original model-based behavior.
	train_penv = parallel_env(scenario=scenario, groups=[group], time_step=0.1, max_duration=120,
	include_success=False)
	train_venv = make_vec_from_penv(train_penv, seed=0, monitor=True, monitor_keywords=())

	# 3. Train a policy
	sac = SAC("MlpPolicy", train_venv)
	sac.learn(total_timesteps=300_000)
No results found