Skip to content

Instantly share code, notes, and snippets.

@jeguzzi
Created July 3, 2025 15:00
Show Gist options
  • Select an option

  • Save jeguzzi/8b6480d02f8924d441f4ddc27cb8074b to your computer and use it in GitHub Desktop.

Select an option

Save jeguzzi/8b6480d02f8924d441f4ddc27cb8074b to your computer and use it in GitHub Desktop.
from navground import sim
from navground.learning import ControlActionConfig, DefaultObservationConfig, GroupConfig
from navground.learning.parallel_env import parallel_env, make_vec_from_penv
from navground.learning.rewards import SocialReward
from stable_baselines3 import SAC
# 1. Load a scenario
scenario = sim.load_scenario(...)
# 2. Create a training environmnent
action_config = ControlActionConfig(max_acceleration=1.0, max_angular_acceleration=10.0,
use_acceleration_action=True)
sensor = sim.load_sensor(...)
observation_config = DefaultObservationConfig(include_target_direction=True, include_velocity=True,
include_angular_speed=True, flat=True)
# The wheelchairs have indices 0, 1, 2, 3 and should be controlled by a policy.
group = GroupConfig(indices=(0, 1, 2, 3), action=action_config, observation=observation_config,
sensor=sensor, reward=SocialReward(safety_margin=0.2))
# The other agents (i.e., humans) will be controlled by the original model-based behavior.
train_penv = parallel_env(scenario=scenario, groups=[group], time_step=0.1, max_duration=120,
include_success=False)
train_venv = make_vec_from_penv(train_penv, seed=0, monitor=True, monitor_keywords=())
# 3. Train a policy
sac = SAC("MlpPolicy", train_venv)
sac.learn(total_timesteps=300_000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment